]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
load sklearn modules only when training data has changed
authorjonaswinkler <jonas.winkler@jpwinkler.de>
Mon, 15 Feb 2021 10:25:25 +0000 (11:25 +0100)
committerjonaswinkler <jonas.winkler@jpwinkler.de>
Mon, 15 Feb 2021 10:25:25 +0000 (11:25 +0100)
src/documents/classifier.py

index b577997e3ee69ee01ccf368a247b590a3b3d3a3b..47dd7dfc75e30b564f141b7d1ce7f76b7c2d8450 100755 (executable)
@@ -95,9 +95,6 @@ class DocumentClassifier(object):
             pickle.dump(self.document_type_classifier, f)
 
     def train(self):
-        from sklearn.feature_extraction.text import CountVectorizer
-        from sklearn.neural_network import MLPClassifier
-        from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
 
         data = list()
         labels_tags = list()
@@ -162,6 +159,10 @@ class DocumentClassifier(object):
             )
         )
 
+        from sklearn.feature_extraction.text import CountVectorizer
+        from sklearn.neural_network import MLPClassifier
+        from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
+
         # Step 2: vectorize data
         logger.debug("Vectorizing data...")
         self.data_vectorizer = CountVectorizer(