return
# Validate the range is above zero and less than uint32_t max
# otherwise, Whoosh can't handle it in the index
- if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF:
+ if (
+ self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
+ or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
+ ):
self._fail(
MESSAGE_ASN_RANGE,
f"Not consuming {self.filename}: "
- f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]",
+ f"Given ASN {self.override_asn} is out of range "
+ f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
+ f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
)
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
self._fail(
searcher.close()
-def update_document(writer, doc):
+def update_document(writer: AsyncWriter, doc: Document):
tags = ",".join([t.name for t in doc.tags.all()])
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
+ asn = doc.archive_serial_number
+ if asn is not None and (
+ asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
+ or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
+ ):
+ logger.error(
+ f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
+ f"ASN is out of range "
+ f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
+ f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
+ )
+ asn = 0
writer.update_document(
id=doc.pk,
title=doc.title,
has_type=doc.document_type is not None,
created=doc.created,
added=doc.added,
- asn=doc.archive_serial_number,
+ asn=asn,
modified=doc.modified,
path=doc.storage_path.name if doc.storage_path else None,
path_id=doc.storage_path.id if doc.storage_path else None,
import os
import re
from collections import OrderedDict
+from typing import Final
from typing import Optional
import dateutil.parser
help_text=_("The original name of the file when it was uploaded"),
)
+ ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
+ ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
+
archive_serial_number = models.PositiveIntegerField(
_("archive serial number"),
blank=True,
unique=True,
db_index=True,
validators=[
- MaxValueValidator(0xFF_FF_FF_FF),
- MinValueValidator(0),
+ MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
+ MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
],
help_text=_(
"The position of this document in your physical document " "archive.",
+from unittest import mock
+
from django.test import TestCase
from documents import index
from documents.models import Document
)
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
+
+ def test_archive_serial_number_ranging(self):
+ """
+ GIVEN:
+ - Document with an archive serial number above schema allowed size
+ WHEN:
+ - Document is provided to the index
+ THEN:
+ - Error is logged
+ - Document ASN is reset to 0 for the index
+ """
+ doc1 = Document.objects.create(
+ title="doc1",
+ checksum="A",
+ content="test test2 test3",
+ # yes, this is allowed, unless full_clean is run
+ # DRF does call the validators, this test won't
+ archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
+ )
+ with self.assertLogs("paperless.index", level="ERROR") as cm:
+ with mock.patch(
+ "documents.index.AsyncWriter.update_document",
+ ) as mocked_update_doc:
+ index.add_or_update_document(doc1)
+
+ mocked_update_doc.assert_called_once()
+ _, kwargs = mocked_update_doc.call_args
+
+ self.assertEqual(kwargs["asn"], 0)
+
+ error_str = cm.output[0]
+ expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
+ self.assertIn(expected_str, error_str)
+
+ def test_archive_serial_number_is_none(self):
+ """
+ GIVEN:
+ - Document with no archive serial number
+ WHEN:
+ - Document is provided to the index
+ THEN:
+ - ASN isn't touched
+ """
+ doc1 = Document.objects.create(
+ title="doc1",
+ checksum="A",
+ content="test test2 test3",
+ )
+ with mock.patch(
+ "documents.index.AsyncWriter.update_document",
+ ) as mocked_update_doc:
+ index.add_or_update_document(doc1)
+
+ mocked_update_doc.assert_called_once()
+ _, kwargs = mocked_update_doc.call_args
+
+ self.assertIsNone(kwargs["asn"])