From: Stephan Bosch Date: Fri, 21 Mar 2025 15:07:14 +0000 (+0100) Subject: lib: Drop the old Unicode Character Database implementation X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5e1499694fe878318b9880f6bc5ae327844d4174;p=thirdparty%2Fdovecot%2Fcore.git lib: Drop the old Unicode Character Database implementation --- diff --git a/.gitignore b/.gitignore index 84e994bc96..942de1e7b4 100644 --- a/.gitignore +++ b/.gitignore @@ -100,7 +100,6 @@ src/ipc/ipc src/lib/event-filter-lexer.c src/lib/event-filter-parser.c src/lib/event-filter-parser.h -src/lib/unicodemap.c src/lib/unicode-data-tables.c src/lib/unicode-data-tables.h src/lib/unicode-data-types.c diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index e3927510f9..3d848e37c4 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -3,8 +3,7 @@ AM_CPPFLAGS = \ noinst_LTLIBRARIES = liblib.la -BUILT_SOURCES = $(srcdir)/unicodemap.c \ - $(srcdir)/unicode-data-tables.c \ +BUILT_SOURCES = $(srcdir)/unicode-data-tables.c \ $(srcdir)/unicode-data-tables.h \ $(srcdir)/unicode-data-types.c \ $(srcdir)/unicode-data-types.h \ @@ -20,15 +19,13 @@ UCD_FILES = \ $(UCD_DIR)/UnicodeData.txt EXTRA_DIST = \ - unicodemap.c \ unicode-data-tables.c \ unicode-data-tables.h \ unicode-data-types.c \ unicode-data-types.h \ - unicodemap.pl \ unicode-ucd-compile.py \ $(UCD_FILES) -EXTRA_CLEAN = unicodemap.c unicode-data-tables.c +EXTRA_CLEAN = unicode-data-tables.c # Squelch autoconf error about using .[ly] sources but not defining $(LEX) # and $(YACC). Using false here avoids accidental use. @@ -57,8 +54,6 @@ $(UCD_DIR)/PropertyValueAliases.txt: $(UCD_DIR)/UnicodeData.txt: $(AM_V_at)test -f $@ || $(WGET) -nv -O $@ $(UCD_URL)/UnicodeData.txt -$(srcdir)/unicodemap.c: $(srcdir)/unicodemap.pl $(UCD_DIR)/UnicodeData.txt - $(AM_V_GEN)$(PERL) $(srcdir)/unicodemap.pl < $(UCD_DIR)/UnicodeData.txt > $@ $(srcdir)/unicode-data-tables.c $(srcdir)/unicode-data-tables.h \ $(srcdir)/unicode-data-types.c $(srcdir)/unicode-data-types.h &: \ $(srcdir)/unicode-ucd-compile.py $(UCD_FILES) diff --git a/src/lib/unicodemap.pl b/src/lib/unicodemap.pl deleted file mode 100755 index 2c1bf7ab0c..0000000000 --- a/src/lib/unicodemap.pl +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env perl -use strict; - -my (%titlecase8, %uni8_decomp); -my (@titlecase16_keys, @titlecase16_values); -my (@titlecase32_keys, @titlecase32_values); -my (@uni16_decomp_keys, @uni16_decomp_values); -my (@uni32_decomp_keys, @uni32_decomp_values); -my (@multidecomp_keys, @multidecomp_offsets, @multidecomp_values); -while (<>) { - chomp $_; - my @arr = split(";"); - my $code = eval("0x".$arr[0]); - my $decomp = $arr[5]; - my $titlecode = $arr[14]; - - if ($titlecode ne "") { - # titlecase mapping - my $value = eval("0x$titlecode"); - if ($value == $code) { - # the same character, ignore - } elsif ($code <= 0xff) { - die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff); - $titlecase8{$code} = $value; - } elsif ($code <= 0xffff) { - die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff); - push @titlecase16_keys, $code; - push @titlecase16_values, $value; - } else { - push @titlecase32_keys, $code; - push @titlecase32_values, $value; - } - } elsif ($decomp =~ /(?:\<[^>]*> )?(.+)/) { - # decompositions - my $decomp_codes = $1; - if ($decomp_codes =~ /^([0-9A-Z]*)$/i) { - # unicharacter decomposition. use separate lists for this - my $value = eval("0x$1"); - if ($value > 0xffffffff) { - print STDERR "Error: We've assumed decomposition codes are max. 32bit\n"; - exit 1; - } - if ($code <= 0xff) { - $uni8_decomp{$code} = $value; - } elsif ($code <= 0xffff) { - push @uni16_decomp_keys, $code; - push @uni16_decomp_values, $value; - } else { - push @uni32_decomp_keys, $code; - push @uni32_decomp_values, $value; - } - } else { - # multicharacter decomposition. - if ($code > 0xffffffff) { - print STDERR "Error: We've assumed multi-decomposition key codes are max. 32bit\n"; - exit 1; - } - - push @multidecomp_keys, $code; - push @multidecomp_offsets, scalar(@multidecomp_values); - - foreach my $dcode (split(" ", $decomp_codes)) { - my $value = eval("0x$dcode"); - if ($value > 0xffffffff) { - print STDERR "Error: We've assumed decomposition codes are max. 32bit\n"; - exit 1; - } - push @multidecomp_values, $value; - } - push @multidecomp_values, 0; - } - } -} - -sub print_list { - my @list = @{$_[0]}; - - my $last = $#list; - my $n = 0; - foreach my $key (@list) { - printf("0x%05x", $key); - last if ($n == $last); - print ","; - - $n++; - if (($n % 8) == 0) { - print "\n\t"; - } else { - print " "; - } - } -} - -print "/* This file is automatically generated by unicodemap.pl from UnicodeData.txt - - NOTE: decompositions for characters having titlecase characters - are not included, because we first translate everything to titlecase */\n"; - -sub print_map8 { - my %map = %{$_[0]}; - my @list; - for (my $i = 0; $i <= 0xff; $i++) { - if (defined($map{$i})) { - push @list, $map{$i}; - } else { - push @list, $i; - } - } - print_list(\@list); -} - -print "static const uint16_t titlecase8_map[256] = {\n\t"; -print_map8(\%titlecase8); -print "\n};\n"; - -print "static const uint16_t titlecase16_keys[] = {\n\t"; -print_list(\@titlecase16_keys); -print "\n};\n"; - -print "static const uint16_t titlecase16_values[] = {\n\t"; -print_list(\@titlecase16_values); -print "\n};\n"; - -print "static const uint32_t titlecase32_keys[] = {\n\t"; -print_list(\@titlecase32_keys); -print "\n};\n"; - -print "static const uint32_t titlecase32_values[] = {\n\t"; -print_list(\@titlecase32_values); -print "\n};\n"; - -print "static const uint16_t uni8_decomp_map[256] = {\n\t"; -print_map8(\%uni8_decomp); -print "\n};\n"; - -print "static const uint16_t uni16_decomp_keys[] = {\n\t"; -print_list(\@uni16_decomp_keys); -print "\n};\n"; - -print "static const uint32_t uni16_decomp_values[] = {\n\t"; -print_list(\@uni16_decomp_values); -print "\n};\n"; - -print "static const uint32_t uni32_decomp_keys[] = {\n\t"; -print_list(\@uni32_decomp_keys); -print "\n};\n"; - -print "static const uint32_t uni32_decomp_values[] = {\n\t"; -print_list(\@uni32_decomp_values); -print "\n};\n"; - -print "static const uint32_t multidecomp_keys[] = {\n\t"; -print_list(\@multidecomp_keys); -print "\n};\n"; - -print "static const uint16_t multidecomp_offsets[] = {\n\t"; -print_list(\@multidecomp_offsets); -print "\n};\n"; - -print "static const uint32_t multidecomp_values[] = {\n\t"; -print_list(\@multidecomp_values); -print "\n};\n";