]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
unichar: Optimized 8bit character conversions.
authorTimo Sirainen <tss@iki.fi>
Sun, 28 Feb 2010 12:13:30 +0000 (14:13 +0200)
committerTimo Sirainen <tss@iki.fi>
Sun, 28 Feb 2010 12:13:30 +0000 (14:13 +0200)
--HG--
branch : HEAD

src/lib/unichar.c
src/lib/unicodemap.pl

index 21585662906848a00995b9f2bd9c9b11f31bb3db..8148173432345d3ee8ce4f5a297487d32002e9a5 100644 (file)
@@ -187,7 +187,9 @@ unichar_t uni_ucs4_to_titlecase(unichar_t chr)
 {
        unsigned int idx;
 
-       if (chr <= 0xffff) {
+       if (chr <= 0xff)
+               return titlecase8_map[chr];
+       else if (chr <= 0xffff) {
                if (!uint16_find(titlecase16_keys, N_ELEMENTS(titlecase16_keys),
                                 chr, &idx))
                        return chr;
@@ -206,16 +208,21 @@ static bool uni_ucs4_decompose_uni(unichar_t *chr)
 {
        unsigned int idx;
 
-       if (*chr <= 0xffff) {
+       if (*chr <= 0xff) {
+               if (uni8_decomp_map[*chr] == *chr)
+                       return FALSE;
+               *chr = uni8_decomp_map[*chr];
+       } else if (*chr <= 0xffff) {
+               if (*chr < uni16_decomp_keys[0])
+                       return FALSE;
+
                if (!uint16_find(uni16_decomp_keys,
-                                N_ELEMENTS(uni16_decomp_keys),
-                                *chr, &idx))
+                                N_ELEMENTS(uni16_decomp_keys), *chr, &idx))
                        return FALSE;
                *chr = uni16_decomp_values[idx];
        } else {
                if (!uint32_find(uni32_decomp_keys,
-                                N_ELEMENTS(uni32_decomp_keys),
-                                *chr, &idx))
+                                N_ELEMENTS(uni32_decomp_keys), *chr, &idx))
                        return FALSE;
                *chr = uni32_decomp_values[idx];
        }
@@ -247,7 +254,7 @@ static bool uni_ucs4_decompose_multi_utf8(unichar_t chr, buffer_t *output)
        const uint16_t *value;
        unsigned int idx;
 
-       if (chr > 0xffff)
+       if (chr < multidecomp_keys[0] || chr > 0xffff)
                return FALSE;
 
        if (!uint32_find(multidecomp_keys, N_ELEMENTS(multidecomp_keys),
index 89fe9a0d7fdb762727e6f521503b493c702b53be..c18c273df26316d4db6f1101830c2eae9cbadd95 100755 (executable)
@@ -1,6 +1,7 @@
 #!/usr/bin/env perl
 use strict;
 
+my (%titlecase8, %uni8_decomp);
 my (@titlecase16_keys, @titlecase16_values);
 my (@titlecase32_keys, @titlecase32_values);
 my (@uni16_decomp_keys, @uni16_decomp_values);
@@ -18,7 +19,11 @@ while (<>) {
     my $value = eval("0x$titlecode");
     if ($value == $code) { 
       # the same character, ignore
-    } elsif ($code <= 0xffff && $value <= 0xffff) {
+    } elsif ($code <= 0xff) {
+      die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff);
+      $titlecase8{$code} = $value;
+    } elsif ($code <= 0xffff) {
+      die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff);
       push @titlecase16_keys, $code;
       push @titlecase16_values, $value;
     } else {
@@ -35,7 +40,9 @@ while (<>) {
        print STDERR "Error: We've assumed decomposition codes are max. 16bit\n";
        exit 1;
       }
-      if ($code <= 0xffff) {
+      if ($code <= 0xff) {
+        $uni8_decomp{$code} = $value;
+      } elsif ($code <= 0xffff) {
        push @uni16_decomp_keys, $code;
        push @uni16_decomp_values, $value;
       } else {
@@ -89,6 +96,23 @@ print "/* This file is automatically generated by unicodemap.pl from UnicodeData
    NOTE: decompositions for characters having titlecase characters
    are not included, because we first translate everything to titlecase */\n";
 
+sub print_map8 {
+  my %map = %{$_[0]};
+  my @list;
+  for (my $i = 0; $i <= 0xff; $i++) {
+    if (defined($map{$i})) {
+      push @list, $map{$i};
+    } else {
+      push @list, $i;
+    }
+  }
+  print_list(\@list);
+}
+
+print "static const uint16_t titlecase8_map[256] = {\n\t";
+print_map8(\%titlecase8);
+print "\n};\n";
+
 print "static const uint16_t titlecase16_keys[] = {\n\t";
 print_list(\@titlecase16_keys);
 print "\n};\n";
@@ -105,6 +129,10 @@ print "static const uint32_t titlecase32_values[] = {\n\t";
 print_list(\@titlecase32_values);
 print "\n};\n";
 
+print "static const uint16_t uni8_decomp_map[256] = {\n\t";
+print_map8(\%uni8_decomp);
+print "\n};\n";
+
 print "static const uint16_t uni16_decomp_keys[] = {\n\t";
 print_list(\@uni16_decomp_keys);
 print "\n};\n";