[thirdparty/gcc.git] / libcpp / ucnid.pl

#! /usr/bin/perl -w
use strict;

# Convert cppucnid.tab to cppucnid.h.  We use two arrays of length
# 65536 to represent the table, since this is nice and simple.  The
# first array holds the tags indicating which ranges are valid in
# which contexts.  The second array holds the language name associated
# with each element.

our(@tags, @names);
@tags = ("") x 65536;
@names = ("") x 65536;


# Array mapping tag numbers to standard #defines
our @stds;

# Current standard and language
our($curstd, $curlang);

# First block of the file is a template to be saved for later.
our @template;

while (<>) {
    chomp;
    last if $_ eq '%%';
    push @template, $_;
};

# Second block of the file is the UCN tables.
# The format looks like this:
#
# [std]
#
# ; language
# xxxx-xxxx xxxx xxxx-xxxx ....
#
# with comment lines starting with #.

while (<>) {
    chomp;
    /^#/ and next;
    /^\s*$/ and next;
    /^\[(.+)\]$/ and do {
	$curstd = $1;
 	next;
    };
    /^; (.+)$/ and do {
	$curlang = $1;
	next;
    };

    process_range(split);
}

# Print out the template, inserting as requested.
$\ = "\n";
for (@template) {
    print("/* Automatically generated from cppucnid.tab, do not edit */"),
        next if $_ eq "[dne]";
    print_table(), next if $_ eq "[table]";
    print;
}

sub print_table {
    my($lo, $hi);
    my $prevname = "";

    for ($lo = 0; $lo <= $#tags; $lo = $hi) {
	$hi = $lo;
	$hi++ while $hi <= $#tags
	    && $tags[$hi] eq $tags[$lo]
	    && $names[$hi] eq $names[$lo];

	# Range from $lo to $hi-1.
	# Don't make entries for ranges that are not valid idchars.
	next if ($tags[$lo] eq "");
	my $tag = $tags[$lo];
        $tag = "    ".$tag if $tag =~ /^C99/;

	if ($names[$lo] eq $prevname) {
	    printf("  { 0x%04x, 0x%04x, %-11s },\n",
		   $lo, $hi-1, $tag);
	} else {
	    printf("  { 0x%04x, 0x%04x, %-11s },  /* %s */\n",
		   $lo, $hi-1, $tag, $names[$lo]);
	}
	$prevname = $names[$lo];
    }
}

# The line is a list of four-digit hexadecimal numbers or
# pairs of such numbers.  Each is a valid identifier character
# from the given language, under the given standard.
sub process_range {
    for my $range (@_) {
	if ($range =~ /^[0-9a-f]{4}$/) {
	    my $i = hex($range);
	    if ($tags[$i] eq "") {
		$tags[$i] = $curstd;
	    } else {
		$tags[$i] = $curstd . "|" . $tags[$i];
	    }
	    if ($names[$i] ne "" && $names[$i] ne $curlang) {
		warn sprintf ("language overlap: %s/%s at %x (tag %d)",
			      $names[$i], $curlang, $i, $tags[$i]);
		next;
	    }
	    $names[$i] = $curlang;
	} elsif ($range =~ /^ ([0-9a-f]{4}) - ([0-9a-f]{4}) $/x) {
	    my ($start, $end) = (hex($1), hex($2));
	    my $i;
	    for ($i = $start; $i <= $end; $i++) {
		if ($tags[$i] eq "") {
		    $tags[$i] = $curstd;
		} else {
		    $tags[$i] = $curstd . "|" . $tags[$i];
		}
		if ($names[$i] ne "" && $names[$i] ne $curlang) {
		    warn sprintf ("language overlap: %s/%s at %x (tag %d)",
				  $names[$i], $curlang, $i, $tags[$i]);
		    next;
		}
		$names[$i] = $curlang;
	    }
	} else {
	    warn "malformed range expression $range";
	}
    }
}
Commit	Line	Data
e6cc3a24 ZW	1	#! /usr/bin/perl -w
	2	use strict;
	3
	4	# Convert cppucnid.tab to cppucnid.h. We use two arrays of length
	5	# 65536 to represent the table, since this is nice and simple. The
	6	# first array holds the tags indicating which ranges are valid in
	7	# which contexts. The second array holds the language name associated
	8	# with each element.
	9
	10	our(@tags, @names);
	11	@tags = ("") x 65536;
	12	@names = ("") x 65536;
	13
	14
	15	# Array mapping tag numbers to standard #defines
	16	our @stds;
	17
	18	# Current standard and language
	19	our($curstd, $curlang);
	20
	21	# First block of the file is a template to be saved for later.
	22	our @template;
	23
	24	while (<>) {
	25	chomp;
	26	last if $_ eq '%%';
	27	push @template, $_;
	28	};
	29
	30	# Second block of the file is the UCN tables.
	31	# The format looks like this:
	32	#
	33	# [std]
	34	#
	35	# ; language
	36	# xxxx-xxxx xxxx xxxx-xxxx ....
	37	#
	38	# with comment lines starting with #.
	39
	40	while (<>) {
	41	chomp;
	42	/^#/ and next;
	43	/^\s*$/ and next;
	44	/^\[(.+)\]$/ and do {
	45	$curstd = $1;
	46	next;
	47	};
	48	/^; (.+)$/ and do {
	49	$curlang = $1;
	50	next;
	51	};
	52
	53	process_range(split);
	54	}
	55
	56	# Print out the template, inserting as requested.
	57	$\ = "\n";
	58	for (@template) {
	59	print("/* Automatically generated from cppucnid.tab, do not edit */"),
	60	next if $_ eq "[dne]";
	61	print_table(), next if $_ eq "[table]";
	62	print;
	63	}
	64
65	sub print_table {
66	my($lo, $hi);
67	my $prevname = "";
68
69	for ($lo = 0; $lo <= $#tags; $lo = $hi) {
70	$hi = $lo;
71	$hi++ while $hi <= $#tags
72	&& $tags[$hi] eq $tags[$lo]
73	&& $names[$hi] eq $names[$lo];
74
75	# Range from $lo to $hi-1.
76	# Don't make entries for ranges that are not valid idchars.
77	next if ($tags[$lo] eq "");
78	my $tag = $tags[$lo];
79	$tag = " ".$tag if $tag =~ /^C99/;
80
81	if ($names[$lo] eq $prevname) {
82	printf(" { 0x%04x, 0x%04x, %-11s },\n",
83	$lo, $hi-1, $tag);
84	} else {
85	printf(" { 0x%04x, 0x%04x, %-11s }, /* %s */\n",
86	$lo, $hi-1, $tag, $names[$lo]);
87	}
88	$prevname = $names[$lo];
89	}
90	}
91
92	# The line is a list of four-digit hexadecimal numbers or
93	# pairs of such numbers. Each is a valid identifier character
94	# from the given language, under the given standard.
95	sub process_range {
96	for my $range (@_) {
97	if ($range =~ /^[0-9a-f]{4}$/) {
98	my $i = hex($range);
99	if ($tags[$i] eq "") {
100	$tags[$i] = $curstd;
101	} else {
102	$tags[$i] = $curstd . "\|" . $tags[$i];
103	}
104	if ($names[$i] ne "" && $names[$i] ne $curlang) {
105	warn sprintf ("language overlap: %s/%s at %x (tag %d)",
106	$names[$i], $curlang, $i, $tags[$i]);
107	next;
108	}
109	$names[$i] = $curlang;
110	} elsif ($range =~ /^ ([0-9a-f]{4}) - ([0-9a-f]{4}) $/x) {
111	my ($start, $end) = (hex($1), hex($2));
112	my $i;
113	for ($i = $start; $i <= $end; $i++) {
114	if ($tags[$i] eq "") {
115	$tags[$i] = $curstd;
116	} else {
117	$tags[$i] = $curstd . "\|" . $tags[$i];
118	}
119	if ($names[$i] ne "" && $names[$i] ne $curlang) {
120	warn sprintf ("language overlap: %s/%s at %x (tag %d)",
121	$names[$i], $curlang, $i, $tags[$i]);
122	next;
123	}
124	$names[$i] = $curlang;
125	}
126	} else {
127	warn "malformed range expression $range";
128	}
129	}
130	}