From: Timo Sirainen Date: Sun, 28 Feb 2010 12:13:30 +0000 (+0200) Subject: unichar: Optimized 8bit character conversions. X-Git-Tag: 2.0.beta4~157 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=15f53fe60a33d7450555b5235a7cf7b9c3e7bcfd;p=thirdparty%2Fdovecot%2Fcore.git unichar: Optimized 8bit character conversions. --HG-- branch : HEAD --- diff --git a/src/lib/unichar.c b/src/lib/unichar.c index 2158566290..8148173432 100644 --- a/src/lib/unichar.c +++ b/src/lib/unichar.c @@ -187,7 +187,9 @@ unichar_t uni_ucs4_to_titlecase(unichar_t chr) { unsigned int idx; - if (chr <= 0xffff) { + if (chr <= 0xff) + return titlecase8_map[chr]; + else if (chr <= 0xffff) { if (!uint16_find(titlecase16_keys, N_ELEMENTS(titlecase16_keys), chr, &idx)) return chr; @@ -206,16 +208,21 @@ static bool uni_ucs4_decompose_uni(unichar_t *chr) { unsigned int idx; - if (*chr <= 0xffff) { + if (*chr <= 0xff) { + if (uni8_decomp_map[*chr] == *chr) + return FALSE; + *chr = uni8_decomp_map[*chr]; + } else if (*chr <= 0xffff) { + if (*chr < uni16_decomp_keys[0]) + return FALSE; + if (!uint16_find(uni16_decomp_keys, - N_ELEMENTS(uni16_decomp_keys), - *chr, &idx)) + N_ELEMENTS(uni16_decomp_keys), *chr, &idx)) return FALSE; *chr = uni16_decomp_values[idx]; } else { if (!uint32_find(uni32_decomp_keys, - N_ELEMENTS(uni32_decomp_keys), - *chr, &idx)) + N_ELEMENTS(uni32_decomp_keys), *chr, &idx)) return FALSE; *chr = uni32_decomp_values[idx]; } @@ -247,7 +254,7 @@ static bool uni_ucs4_decompose_multi_utf8(unichar_t chr, buffer_t *output) const uint16_t *value; unsigned int idx; - if (chr > 0xffff) + if (chr < multidecomp_keys[0] || chr > 0xffff) return FALSE; if (!uint32_find(multidecomp_keys, N_ELEMENTS(multidecomp_keys), diff --git a/src/lib/unicodemap.pl b/src/lib/unicodemap.pl index 89fe9a0d7f..c18c273df2 100755 --- a/src/lib/unicodemap.pl +++ b/src/lib/unicodemap.pl @@ -1,6 +1,7 @@ #!/usr/bin/env perl use strict; +my (%titlecase8, %uni8_decomp); my (@titlecase16_keys, @titlecase16_values); my (@titlecase32_keys, @titlecase32_values); my (@uni16_decomp_keys, @uni16_decomp_values); @@ -18,7 +19,11 @@ while (<>) { my $value = eval("0x$titlecode"); if ($value == $code) { # the same character, ignore - } elsif ($code <= 0xffff && $value <= 0xffff) { + } elsif ($code <= 0xff) { + die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff); + $titlecase8{$code} = $value; + } elsif ($code <= 0xffff) { + die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff); push @titlecase16_keys, $code; push @titlecase16_values, $value; } else { @@ -35,7 +40,9 @@ while (<>) { print STDERR "Error: We've assumed decomposition codes are max. 16bit\n"; exit 1; } - if ($code <= 0xffff) { + if ($code <= 0xff) { + $uni8_decomp{$code} = $value; + } elsif ($code <= 0xffff) { push @uni16_decomp_keys, $code; push @uni16_decomp_values, $value; } else { @@ -89,6 +96,23 @@ print "/* This file is automatically generated by unicodemap.pl from UnicodeData NOTE: decompositions for characters having titlecase characters are not included, because we first translate everything to titlecase */\n"; +sub print_map8 { + my %map = %{$_[0]}; + my @list; + for (my $i = 0; $i <= 0xff; $i++) { + if (defined($map{$i})) { + push @list, $map{$i}; + } else { + push @list, $i; + } + } + print_list(\@list); +} + +print "static const uint16_t titlecase8_map[256] = {\n\t"; +print_map8(\%titlecase8); +print "\n};\n"; + print "static const uint16_t titlecase16_keys[] = {\n\t"; print_list(\@titlecase16_keys); print "\n};\n"; @@ -105,6 +129,10 @@ print "static const uint32_t titlecase32_values[] = {\n\t"; print_list(\@titlecase32_values); print "\n};\n"; +print "static const uint16_t uni8_decomp_map[256] = {\n\t"; +print_map8(\%uni8_decomp); +print "\n};\n"; + print "static const uint16_t uni16_decomp_keys[] = {\n\t"; print_list(\@uni16_decomp_keys); print "\n};\n";