&& chmod 755 /usr/local/bin/paperless_cmd.sh \
&& mv flower-conditional.sh /usr/local/bin/flower-conditional.sh \
&& chmod 755 /usr/local/bin/flower-conditional.sh \
- && echo "Installing managment commands" \
+ && echo "Installing management commands" \
&& chmod +x install_management_commands.sh \
&& ./install_management_commands.sh
# Docker Compose file for running paperless testing with actual gotenberg
# and Tika containers for a more end to end test of the Tika related functionality
-# Can be used locally or by the CI to start the nessecary containers with the
+# Can be used locally or by the CI to start the necessary containers with the
# correct networking for the tests
version: "3.7"
The collation feature can be used together with the [subdirs as tags](configuration.md#consume_config)
feature (but this is not a requirement). Just create a correctly named double-sided subdir
-in the hierachy and upload your scans there. For example, both `double-sided/foo/bar` as
+in the hierarchy and upload your scans there. For example, both `double-sided/foo/bar` as
well as `foo/bar/double-sided` will cause the collated document to be treated as if it
were uploaded into `foo/bar` and receive both `foo` and `bar` tags, but not `double-sided`.
You might encounter errors such as:
```shell-session
-The following error occured while consuming document.pdf: [Errno 13] Permission denied: '/usr/src/paperless/src/../consume/document.pdf'
+The following error occurred while consuming document.pdf: [Errno 13] Permission denied: '/usr/src/paperless/src/../consume/document.pdf'
```
This happens when paperless does not have permission to delete files
## Share Links
-Paperless-ngx added the abiltiy to create shareable links to files in version 2.0. You can find the button for this on the document detail screen.
+Paperless-ngx added the ability to create shareable links to files in version 2.0. You can find the button for this on the document detail screen.
- Share links do not require a user to login and thus link directly to a file.
- Links are unique and are of the form `{paperless-url}/share/{randomly-generated-slug}`.
in reverse order, since the ADF will have scanned the pages from bottom
to top.
- Returns a status message on succcess, or raises a ConsumerError
+ Returns a status message on success, or raises a ConsumerError
in case of failure.
"""
if settings.FILENAME_FORMAT_REMOVE_NONE:
path = path.replace("/-none-/", "/") # remove empty directories
path = path.replace(" -none-", "") # remove when spaced, with space
- path = path.replace("-none-", "") # remove rest of the occurences
+ path = path.replace("-none-", "") # remove rest of the occurrences
path = path.replace("-none-", "none") # backward compatibility
path = path.strip(os.sep)
polling_interval = settings.CONSUMER_POLLING
if polling_interval == 0: # pragma: no cover
# Only happens if INotify failed to import
- logger.warn("Using polling of 10s, consider settng this")
+ logger.warn("Using polling of 10s, consider setting this")
polling_interval = 10
with ThreadPoolExecutor(max_workers=4) as pool:
)
trigger_matched = False
- # Document correpondent vs trigger has_correspondent
+ # Document correspondent vs trigger has_correspondent
if (
trigger.filter_has_correspondent is not None
and document.correspondent != trigger.filter_has_correspondent
(5, "Fuzzy Match"),
],
default=1,
- help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
+ help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
(5, "Fuzzy Match"),
],
default=1,
- help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
+ help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
]
(6, "Automatic Classification"),
],
default=1,
- help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
+ help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
("is_insensitive", models.BooleanField(default=True)),
(6, "Automatic Classification"),
],
default=1,
- help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
+ help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
(6, "Automatic Classification"),
],
default=1,
- help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
+ help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
return
else:
# This is mostly here for the tika parser in docker
- # environemnts. The servers for parsing need to come up first,
+ # environments. The servers for parsing need to come up first,
# and the docker setup doesn't ensure that tika is running
# before attempting migrations.
logger.error("Parse error, will try again in 5 seconds...")
if doc_id not in target_doc_ids:
self.remove_doclink(document, field, doc_id)
- # Create an instance if target doc doesnt have this field or append it to an existing one
+ # Create an instance if target doc doesn't have this field or append it to an existing one
existing_custom_field_instances = {
custom_field.document_id: custom_field
for custom_field in CustomFieldInstance.objects.filter(
).count()
== 0
):
- # can be triggered on existing docs, so only add the field if it doesnt already exist
+ # can be triggered on existing docs, so only add the field if it doesn't already exist
CustomFieldInstance.objects.create(
field=field,
document=document,
GIVEN:
- NUMBER_OF_SUGGESTED_DATES = 0 (disables feature)
WHEN:
- - API reuqest for document suggestions
+ - API request for document suggestions
THEN:
- Dont check for suggested dates at all
"""
GIVEN:
- A document with a single note
WHEN:
- - API reuqest for document notes is made
+ - API request for document notes is made
THEN:
- The associated note is returned
"""
with AsyncWriter(index.open_index()) as writer:
# Note to future self: there is a reason we dont use a model signal handler to update the index: some operations edit many documents at once
# (retagger, renamer) and we don't want to open a writer for each of these, but rather perform the entire operation with one writer.
- # That's why we cant open the writer in a model on_save handler or something.
+ # That's why we can't open the writer in a model on_save handler or something.
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)
GIVEN:
- Documents with owners set & without
WHEN:
- - API reuqest for advanced query (search) is made by non-superuser
- - API reuqest for advanced query (search) is made by superuser
+ - API request for advanced query (search) is made by non-superuser
+ - API request for advanced query (search) is made by superuser
THEN:
- Only owned docs are returned for regular users
- All docs are returned for superuser
GIVEN:
- Documents with granted view permissions to others
WHEN:
- - API reuqest for advanced query (search) is made by user
+ - API request for advanced query (search) is made by user
THEN:
- Only docs with granted view permissions are returned
"""
def test_multiple_dates(self):
text = """This text has multiple dates.
- For example 02.02.2018, 22 July 2022 and Dezember 2021.
- But not 24-12-9999 because its in the future..."""
+ For example 02.02.2018, 22 July 2022 and December 2021.
+ But not 24-12-9999 because it's in the future..."""
dates = list(parse_date_generator("", text))
self.assertEqual(len(dates), 3)
self.assertEqual(
)
def test_get_permission_criteria(self):
- # tests contains touples of user instances and the expected filter
+ # tests contains tuples of user instances and the expected filter
tests = (
(None, [query.Term("has_owner", False)]),
(User(42, username="foo", is_superuser=True), []),
)
def test_tags_query_filters(self):
- # tests contains touples of query_parameter dics and the expected whoosh query
+ # tests contains tuples of query_parameter dics and the expected whoosh query
param = "tags"
field, _ = DelayedQuery.param_map[param]
tests = (
):
"""
GIVEN:
- - Encrytped document exists with existing encrypted WebP thumbnail path
+ - Encrypted document exists with existing encrypted WebP thumbnail path
WHEN:
- Migration is attempted
THEN:
return [
Warning(
"DEBUG mode is enabled. Disable Debug mode. This is a serious "
- "security issue, since it puts security overides in place which "
- "are meant to be only used during development. This "
+ "security issue, since it puts security overrides in place "
+ "which are meant to be only used during development. This "
"also means that paperless will tell anyone various "
"debugging information when something goes wrong.",
),
# Disable Django's artificial limit on the number of form fields to submit at
# once. This is a protection against overloading the server, but since this is
-# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
-# of log entries outweight the benefits of such a safeguard.
+# a self-hosted sort of gig, the benefits of being able to mass-delete a ton
+# of log entries outweigh the benefits of such a safeguard.
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
message=message,
)
else:
- # No files to consume, just mark as processed if it wasnt by .eml processing
+ # No files to consume, just mark as processed if it wasn't by .eml processing
if not ProcessedMail.objects.filter(
rule=rule,
uid=message.uid,