]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-language: Drop old word break and word boundary lookup implementation
authorStephan Bosch <stephan.bosch@open-xchange.com>
Fri, 21 Mar 2025 18:40:01 +0000 (19:40 +0100)
committerStephan Bosch <stephan.bosch@open-xchange.com>
Fri, 1 Aug 2025 01:11:19 +0000 (03:11 +0200)
.gitignore
src/lib-language/Makefile.am
src/lib-language/word-properties.pl [deleted file]

index 942de1e7b40333f7383f10c673ab0388ef6509ec..caa6bedfd078d8092381d94b9dd4f7285f2796de 100644 (file)
@@ -105,10 +105,6 @@ src/lib/unicode-data-tables.h
 src/lib/unicode-data-types.c
 src/lib/unicode-data-types.h
 src/lib-compression/bench-compression
-src/lib-language/PropList.txt
-src/lib-language/WordBreakProperty.txt
-src/lib-language/word-boundary-data.c
-src/lib-language/word-break-data.c
 src/lib-dict-backend/dict-drivers-register.c
 src/lib-json/json-format
 src/lib-sql/sql-drivers-register.c
index 5c0e6c7397914390e9e81bbfc8d73992f8afc1f9..49d3b6db700654519e501f6743f0ef3126d85460 100644 (file)
@@ -1,14 +1,10 @@
 noinst_LTLIBRARIES = liblanguage.la
 
-# I$(top_srcdir)/src/lib-language needed to include
-# word-break-data.c and word-boundary-data.c
-# in lang-tokenizer-generic.c
 AM_CPPFLAGS = \
        -I$(top_srcdir)/src/lib \
        -I$(top_srcdir)/src/lib-settings \
        -I$(top_srcdir)/src/lib-test \
        -I$(top_srcdir)/src/lib-mail \
-       -I$(top_srcdir)/src/lib-language \
        -I$(top_srcdir)/src/lib-var-expand \
        $(LIBEXTTEXTCAT_CFLAGS) \
        $(LIBICU_CFLAGS) \
@@ -33,29 +29,10 @@ dist_stopwords_DATA = \
        stopwords/stopwords_sv.txt \
        stopwords/stopwords_tr.txt
 
-BUILT_SOURCES = $(srcdir)/word-boundary-data.c \
-               $(srcdir)/word-break-data.c
-
 EXTRA_DIST = \
        udhr_fra.txt \
-       PropList.txt \
-       word-properties.pl \
-       WordBreakProperty.txt \
-       word-boundary-data.c \
-       word-break-data.c \
        stopwords/stopwords_malformed.txt
 
-$(srcdir)/WordBreakProperty.txt:
-       $(AM_V_at)test -f $@ || $(WGET) -nv -O $@ https://dovecot.org/res/WordBreakProperty.txt
-$(srcdir)/word-boundary-data.c: $(srcdir)/word-properties.pl $(srcdir)/WordBreakProperty.txt
-       $(AM_V_at)$(PERL) $(srcdir)/word-properties.pl boundaries $(srcdir)/WordBreakProperty.txt > $@
-
-$(srcdir)/PropList.txt:
-       $(AM_V_at)test -f $@ || $(WGET) -nv -O $@ https://dovecot.org/res/PropList.txt
-$(srcdir)/word-break-data.c: $(srcdir)/word-properties.pl $(srcdir)/PropList.txt
-       $(AM_V_at)$(PERL) $(srcdir)/word-properties.pl breaks $(srcdir)/PropList.txt > $@
-
-
 if BUILD_LANG_STEMMER
 STEMMER_LIBS = -lstemmer
 endif
diff --git a/src/lib-language/word-properties.pl b/src/lib-language/word-properties.pl
deleted file mode 100644 (file)
index c600d14..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-use warnings;
-
-my @categories;
-my $which = shift(@ARGV);
-if ($which eq 'boundaries') {
-    @categories = qw(CR LF Newline Extend Regional_Indicator Format Katakana Hebrew_Letter ALetter
-                   Single_Quote Double_Quote MidNumLet MidLetter MidNum Numeric ExtendNumLet);
-} elsif ($which eq 'breaks') {
-    @categories = qw(White_Space Dash Quotation_Mark Terminal_Punctuation STerm Pattern_White_Space);
-} else {
-    die "specify 'boundaries' or 'breaks'";
-}
-
-my $catregexp=join('|', @categories);
-my %catlists = map { $_ => []; } (@categories);
-
-while(<>) {
-    next if (m/^#/ or m/^\s*$/);
-    push(@{$catlists{$3}}, defined($2) ? (hex($1)..hex($2)) : hex($1))
-       if (m/([[:xdigit:]]+)(?:\.\.([[:xdigit:]]+))?\s+; ($catregexp) #/)
-}
-
-print "/* This file is automatically generated by word-properties.pl from $ARGV */\n";
-foreach(@categories) {
-    my $arref=$catlists{$_};
-    print "static const uint32_t ${_}[]= {\n";
-    while(scalar(@$arref)) {
-       print("\t", join(", ", map { sprintf("0x%05X", $_); } splice(@$arref, 0, 8)));
-       print(scalar(@$arref) ? ", \n" : "\n");
-    }
-    print("};\n");
-}