Paperless provides a wide range of customizations. Depending on how you
run paperless, these settings have to be defined in different places.
+Certain configuration options may be set via the UI. This currently includes
+common [OCR](#ocr) related settings. If set, these will take preference over the
+settings via environment variables. If not set, the environment setting or applicable
+default will be utilized instead.
+
- If you run paperless on docker, `paperless.conf` is not used.
Rather, configure paperless by copying necessary options to
`docker-compose.env`.
import { MailComponent } from './components/manage/mail/mail.component'
import { UsersAndGroupsComponent } from './components/admin/users-groups/users-groups.component'
import { CustomFieldsComponent } from './components/manage/custom-fields/custom-fields.component'
+import { ConfigComponent } from './components/admin/config/config.component'
export const routes: Routes = [
{ path: '', redirectTo: 'dashboard', pathMatch: 'full' },
},
},
},
+ {
+ path: 'config',
+ component: ConfigComponent,
+ canActivate: [PermissionsGuard],
+ data: {
+ requiredPermission: {
+ action: PermissionAction.View,
+ type: PermissionType.Admin,
+ },
+ },
+ },
{
path: 'tasks',
component: TasksComponent,
import { PdfViewerComponent } from './components/common/pdf-viewer/pdf-viewer.component'
import { DocumentLinkComponent } from './components/common/input/document-link/document-link.component'
import { PreviewPopupComponent } from './components/common/preview-popup/preview-popup.component'
+import { ConfigComponent } from './components/admin/config/config.component'
+import { SwitchComponent } from './components/common/input/switch/switch.component'
import localeAf from '@angular/common/locales/af'
import localeAr from '@angular/common/locales/ar'
PdfViewerComponent,
DocumentLinkComponent,
PreviewPopupComponent,
+ ConfigComponent,
+ SwitchComponent,
],
imports: [
BrowserModule,
--- /dev/null
+<pngx-page-header title="Configuration" i18n-title></pngx-page-header>
+
+<form [formGroup]="configForm" (ngSubmit)="saveConfig()" class="pb-4">
+
+ <ul ngbNav #nav="ngbNav" class="nav-tabs">
+ @for (category of optionCategories; track category) {
+ <li [ngbNavItem]="category">
+ <a ngbNavLink i18n>{{category}}</a>
+ <ng-template ngbNavContent>
+ <div class="p-3">
+ <div class="row row-cols-1 row-cols-md-2 row-cols-lg-3 g-2">
+ @for (option of getCategoryOptions(category); track option.key) {
+ <div class="col">
+ <div class="card bg-light">
+ <div class="card-body">
+ <div class="card-title">
+ <h6>
+ {{option.title}}
+ <a class="btn btn-sm btn-link" title="Read the documentation about this setting" i18n-title [href]="getDocsUrl(option.config_key)" target="_blank" referrerpolicy="no-referrer">
+ <svg class="sidebaricon" fill="currentColor">
+ <use xlink:href="assets/bootstrap-icons.svg#info-circle"/>
+ </svg>
+ </a>
+ </h6>
+ </div>
+ <div class="mb-n3">
+ @switch (option.type) {
+ @case (ConfigOptionType.Select) { <pngx-input-select [formControlName]="option.key" [error]="errors[option.key]" [items]="option.choices" [allowNull]="true"></pngx-input-select> }
+ @case (ConfigOptionType.Number) { <pngx-input-number [formControlName]="option.key" [error]="errors[option.key]" [showAdd]="false"></pngx-input-number> }
+ @case (ConfigOptionType.Boolean) { <pngx-input-switch [formControlName]="option.key" [error]="errors[option.key]" title="Enable" i18n-title></pngx-input-switch> }
+ @case (ConfigOptionType.String) { <pngx-input-text [formControlName]="option.key" [error]="errors[option.key]"></pngx-input-text> }
+ @case (ConfigOptionType.JSON) { <pngx-input-text [formControlName]="option.key" [error]="errors[option.key]"></pngx-input-text> }
+ }
+ </div>
+ </div>
+ </div>
+ </div>
+ }
+ </div>
+ </div>
+ </ng-template>
+ </li>
+ }
+ </ul>
+ <div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
+ <div class="btn-toolbar" role="toolbar">
+ <div class="btn-group me-2">
+ <button type="button" (click)="discardChanges()" class="btn btn-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
+ </div>
+ <div class="btn-group">
+ <button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button>
+ </div>
+ </div>
+</form>
--- /dev/null
+import { ComponentFixture, TestBed } from '@angular/core/testing'
+
+import { ConfigComponent } from './config.component'
+import { ConfigService } from 'src/app/services/config.service'
+import { ToastService } from 'src/app/services/toast.service'
+import { of, throwError } from 'rxjs'
+import { OutputTypeConfig } from 'src/app/data/paperless-config'
+import { HttpClientTestingModule } from '@angular/common/http/testing'
+import { BrowserModule } from '@angular/platform-browser'
+import { NgbModule } from '@ng-bootstrap/ng-bootstrap'
+import { NgSelectModule } from '@ng-select/ng-select'
+import { TextComponent } from '../../common/input/text/text.component'
+import { NumberComponent } from '../../common/input/number/number.component'
+import { SwitchComponent } from '../../common/input/switch/switch.component'
+import { FormsModule, ReactiveFormsModule } from '@angular/forms'
+import { PageHeaderComponent } from '../../common/page-header/page-header.component'
+import { SelectComponent } from '../../common/input/select/select.component'
+
+describe('ConfigComponent', () => {
+ let component: ConfigComponent
+ let fixture: ComponentFixture<ConfigComponent>
+ let configService: ConfigService
+ let toastService: ToastService
+
+ beforeEach(async () => {
+ await TestBed.configureTestingModule({
+ declarations: [
+ ConfigComponent,
+ TextComponent,
+ SelectComponent,
+ NumberComponent,
+ SwitchComponent,
+ PageHeaderComponent,
+ ],
+ imports: [
+ HttpClientTestingModule,
+ BrowserModule,
+ NgbModule,
+ NgSelectModule,
+ FormsModule,
+ ReactiveFormsModule,
+ ],
+ }).compileComponents()
+
+ configService = TestBed.inject(ConfigService)
+ toastService = TestBed.inject(ToastService)
+ fixture = TestBed.createComponent(ConfigComponent)
+ component = fixture.componentInstance
+ fixture.detectChanges()
+ })
+
+ it('should load config on init, show error if necessary', () => {
+ const getSpy = jest.spyOn(configService, 'getConfig')
+ const errorSpy = jest.spyOn(toastService, 'showError')
+ getSpy.mockReturnValueOnce(
+ throwError(() => new Error('Error getting config'))
+ )
+ component.ngOnInit()
+ expect(getSpy).toHaveBeenCalled()
+ expect(errorSpy).toHaveBeenCalled()
+ getSpy.mockReturnValueOnce(
+ of({ output_type: OutputTypeConfig.PDF_A } as any)
+ )
+ component.ngOnInit()
+ expect(component.initialConfig).toEqual({
+ output_type: OutputTypeConfig.PDF_A,
+ })
+ })
+
+ it('should save config, show error if necessary', () => {
+ const saveSpy = jest.spyOn(configService, 'saveConfig')
+ const errorSpy = jest.spyOn(toastService, 'showError')
+ saveSpy.mockReturnValueOnce(
+ throwError(() => new Error('Error saving config'))
+ )
+ component.saveConfig()
+ expect(saveSpy).toHaveBeenCalled()
+ expect(errorSpy).toHaveBeenCalled()
+ saveSpy.mockReturnValueOnce(
+ of({ output_type: OutputTypeConfig.PDF_A } as any)
+ )
+ component.saveConfig()
+ expect(component.initialConfig).toEqual({
+ output_type: OutputTypeConfig.PDF_A,
+ })
+ })
+
+ it('should support discard changes', () => {
+ component.initialConfig = { output_type: OutputTypeConfig.PDF_A2 } as any
+ component.configForm.patchValue({ output_type: OutputTypeConfig.PDF_A })
+ component.discardChanges()
+ expect(component.configForm.get('output_type').value).toEqual(
+ OutputTypeConfig.PDF_A2
+ )
+ })
+
+ it('should support JSON validation for e.g. user_args', () => {
+ component.configForm.patchValue({ user_args: '{ foo bar }' })
+ expect(component.errors).toEqual({ user_args: 'Invalid JSON' })
+ component.configForm.patchValue({ user_args: '{ "foo": "bar" }' })
+ expect(component.errors).toEqual({ user_args: null })
+ })
+})
--- /dev/null
+import { Component, OnDestroy, OnInit } from '@angular/core'
+import { AbstractControl, FormControl, FormGroup } from '@angular/forms'
+import {
+ BehaviorSubject,
+ Observable,
+ Subject,
+ Subscription,
+ first,
+ takeUntil,
+} from 'rxjs'
+import {
+ PaperlessConfigOptions,
+ ConfigCategory,
+ ConfigOption,
+ ConfigOptionType,
+ PaperlessConfig,
+} from 'src/app/data/paperless-config'
+import { ConfigService } from 'src/app/services/config.service'
+import { ToastService } from 'src/app/services/toast.service'
+import { ComponentWithPermissions } from '../../with-permissions/with-permissions.component'
+import { DirtyComponent, dirtyCheck } from '@ngneat/dirty-check-forms'
+
+@Component({
+ selector: 'pngx-config',
+ templateUrl: './config.component.html',
+ styleUrl: './config.component.scss',
+})
+export class ConfigComponent
+ extends ComponentWithPermissions
+ implements OnInit, OnDestroy, DirtyComponent
+{
+ public readonly ConfigOptionType = ConfigOptionType
+
+ // generated dynamically
+ public configForm = new FormGroup({})
+
+ public errors = {}
+
+ get optionCategories(): string[] {
+ return Object.values(ConfigCategory)
+ }
+
+ getCategoryOptions(category: string): ConfigOption[] {
+ return PaperlessConfigOptions.filter((o) => o.category === category)
+ }
+
+ public loading: boolean = false
+
+ initialConfig: PaperlessConfig
+ store: BehaviorSubject<any>
+ storeSub: Subscription
+ isDirty$: Observable<boolean>
+
+ private unsubscribeNotifier: Subject<any> = new Subject()
+
+ constructor(
+ private configService: ConfigService,
+ private toastService: ToastService
+ ) {
+ super()
+ this.configForm.addControl('id', new FormControl())
+ PaperlessConfigOptions.forEach((option) => {
+ this.configForm.addControl(option.key, new FormControl())
+ })
+ }
+
+ ngOnInit(): void {
+ this.loading = true
+ this.configService
+ .getConfig()
+ .pipe(takeUntil(this.unsubscribeNotifier))
+ .subscribe({
+ next: (config) => {
+ this.loading = false
+ this.initialize(config)
+ },
+ error: (e) => {
+ this.loading = false
+ this.toastService.showError($localize`Error retrieving config`, e)
+ },
+ })
+
+ // validate JSON inputs
+ PaperlessConfigOptions.filter(
+ (o) => o.type === ConfigOptionType.JSON
+ ).forEach((option) => {
+ this.configForm
+ .get(option.key)
+ .addValidators((control: AbstractControl) => {
+ if (!control.value || control.value.toString().length === 0)
+ return null
+ try {
+ JSON.parse(control.value)
+ } catch (e) {
+ return [
+ {
+ user_args: e,
+ },
+ ]
+ }
+ return null
+ })
+ this.configForm.get(option.key).statusChanges.subscribe((status) => {
+ this.errors[option.key] =
+ status === 'INVALID' ? $localize`Invalid JSON` : null
+ })
+ this.configForm.get(option.key).updateValueAndValidity()
+ })
+ }
+
+ ngOnDestroy(): void {
+ this.unsubscribeNotifier.next(true)
+ this.unsubscribeNotifier.complete()
+ }
+
+ private initialize(config: PaperlessConfig) {
+ if (!this.store) {
+ this.store = new BehaviorSubject(config)
+
+ this.store
+ .asObservable()
+ .pipe(takeUntil(this.unsubscribeNotifier))
+ .subscribe((state) => {
+ this.configForm.patchValue(state, { emitEvent: false })
+ })
+
+ this.isDirty$ = dirtyCheck(this.configForm, this.store.asObservable())
+ }
+ this.configForm.patchValue(config)
+
+ this.initialConfig = config
+ }
+
+ getDocsUrl(key: string) {
+ return `https://docs.paperless-ngx.com/configuration/#${key}`
+ }
+
+ public saveConfig() {
+ this.loading = true
+ this.configService
+ .saveConfig(this.configForm.value as PaperlessConfig)
+ .pipe(takeUntil(this.unsubscribeNotifier), first())
+ .subscribe({
+ next: (config) => {
+ this.loading = false
+ this.initialize(config)
+ this.store.next(config)
+ this.toastService.showInfo($localize`Configuration updated`)
+ },
+ error: (e) => {
+ this.loading = false
+ this.toastService.showError(
+ $localize`An error occurred updating configuration`,
+ e
+ )
+ },
+ })
+ }
+
+ public discardChanges() {
+ this.configForm.reset(this.initialConfig)
+ }
+}
</svg><span> <ng-container i18n>Settings</ng-container></span>
</a>
</li>
+ <li class="nav-item" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Admin }">
+ <a class="nav-link" routerLink="config" routerLinkActive="active" (click)="closeMenu()"
+ ngbPopover="Configuration" i18n-ngbPopover [disablePopover]="!slimSidebarEnabled" placement="end"
+ container="body" triggers="mouseenter:mouseleave" popoverClass="popover-slim">
+ <svg class="sidebaricon" fill="currentColor">
+ <use xlink:href="assets/bootstrap-icons.svg#sliders2-vertical" />
+ </svg><span> <ng-container i18n>Configuration</ng-container></span>
+ </a>
+ </li>
<li class="nav-item" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.User }">
<a class="nav-link" routerLink="usersgroups" routerLinkActive="active" (click)="closeMenu()"
ngbPopover="Users & Groups" i18n-ngbPopover [disablePopover]="!slimSidebarEnabled" placement="end"
<div class="mb-3" [class.pb-3]="error">
<div class="row">
<div class="d-flex align-items-center position-relative hidden-button-container" [class.col-md-3]="horizontal">
- <label class="form-label" [class.mb-md-0]="horizontal" [for]="inputId">{{title}}</label>
+ @if (title) {
+ <label class="form-label" [class.mb-md-0]="horizontal" [for]="inputId">{{title}}</label>
+ }
@if (removable) {
<button type="button" class="btn btn-sm btn-danger position-absolute left-0" (click)="removed.emit(this)">
<svg class="sidebaricon" fill="currentColor">
--- /dev/null
+<div class="mb-3">
+ <div class="row">
+ @if (horizontal) {
+ <div class="d-flex align-items-center position-relative hidden-button-container col-md-3">
+ <label class="form-label" [class.mb-md-0]="horizontal" [for]="inputId">{{title}}</label>
+ @if (removable) {
+ <button type="button" class="btn btn-sm btn-danger position-absolute left-0" (click)="removed.emit(this)">
+ <svg class="sidebaricon" fill="currentColor">
+ <use xlink:href="assets/bootstrap-icons.svg#x"/>
+ </svg> <ng-container i18n>Remove</ng-container>
+ </button>
+ }
+ </div>
+ }
+ <div [ngClass]="{'col-md-9': horizontal, 'align-items-center': horizontal, 'd-flex': horizontal}">
+ <div class="form-check form-switch">
+ <input #inputField type="checkbox" class="form-check-input" [id]="inputId" [(ngModel)]="value" (change)="onChange(value)" (blur)="onTouched()" [disabled]="disabled">
+ @if (!horizontal) {
+ <label class="form-check-label" [for]="inputId">{{title}}</label>
+ }
+ @if (hint) {
+ <div class="form-text text-muted">{{hint}}</div>
+ }
+ </div>
+ </div>
+ </div>
+ </div>
--- /dev/null
+import { ComponentFixture, TestBed } from '@angular/core/testing'
+import { SwitchComponent } from './switch.component'
+import {
+ FormsModule,
+ NG_VALUE_ACCESSOR,
+ ReactiveFormsModule,
+} from '@angular/forms'
+
+describe('SwitchComponent', () => {
+ let component: SwitchComponent
+ let fixture: ComponentFixture<SwitchComponent>
+ let input: HTMLInputElement
+
+ beforeEach(async () => {
+ TestBed.configureTestingModule({
+ declarations: [SwitchComponent],
+ providers: [],
+ imports: [FormsModule, ReactiveFormsModule],
+ }).compileComponents()
+
+ fixture = TestBed.createComponent(SwitchComponent)
+ fixture.debugElement.injector.get(NG_VALUE_ACCESSOR)
+ component = fixture.componentInstance
+ fixture.detectChanges()
+ input = component.inputField.nativeElement
+ })
+
+ it('should support use of checkbox', () => {
+ input.checked = true
+ input.dispatchEvent(new Event('change'))
+ fixture.detectChanges()
+ expect(component.value).toBeTruthy()
+
+ input.checked = false
+ input.dispatchEvent(new Event('change'))
+ fixture.detectChanges()
+ expect(component.value).toBeFalsy()
+ })
+})
--- /dev/null
+import { Component, forwardRef } from '@angular/core'
+import { NG_VALUE_ACCESSOR } from '@angular/forms'
+import { AbstractInputComponent } from '../abstract-input'
+
+@Component({
+ providers: [
+ {
+ provide: NG_VALUE_ACCESSOR,
+ useExisting: forwardRef(() => SwitchComponent),
+ multi: true,
+ },
+ ],
+ selector: 'pngx-input-switch',
+ templateUrl: './switch.component.html',
+ styleUrls: ['./switch.component.scss'],
+})
+export class SwitchComponent extends AbstractInputComponent<boolean> {
+ constructor() {
+ super()
+ }
+}
<div class="mb-3" [class.pb-3]="error">
<div class="row">
<div class="d-flex align-items-center position-relative hidden-button-container" [class.col-md-3]="horizontal">
- <label class="form-label" [class.mb-md-0]="horizontal" [for]="inputId">{{title}}</label>
+ @if (title) {
+ <label class="form-label" [class.mb-md-0]="horizontal" [for]="inputId">{{title}}</label>
+ }
@if (removable) {
<button type="button" class="btn btn-sm btn-danger position-absolute left-0" (click)="removed.emit(this)">
<svg class="sidebaricon" fill="currentColor">
--- /dev/null
+import { ObjectWithId } from './object-with-id'
+
+// see /src/paperless/models.py
+
+export enum OutputTypeConfig {
+ PDF = 'pdf',
+ PDF_A = 'pdfa',
+ PDF_A1 = 'pdfa-1',
+ PDF_A2 = 'pdfa-2',
+ PDF_A3 = 'pdfa-3',
+}
+
+export enum ModeConfig {
+ SKIP = 'skip',
+ REDO = 'redo',
+ FORCE = 'force',
+ SKIP_NO_ARCHIVE = 'skip_noarchive',
+}
+
+export enum ArchiveFileConfig {
+ NEVER = 'never',
+ WITH_TEXT = 'with_text',
+ ALWAYS = 'always',
+}
+
+export enum CleanConfig {
+ CLEAN = 'clean',
+ FINAL = 'clean-final',
+ NONE = 'none',
+}
+
+export enum ColorConvertConfig {
+ UNCHANGED = 'LeaveColorUnchanged',
+ RGB = 'RGB',
+ INDEPENDENT = 'UseDeviceIndependentColor',
+ GRAY = 'Gray',
+ CMYK = 'CMYK',
+}
+
+export enum ConfigOptionType {
+ String = 'string',
+ Number = 'number',
+ Select = 'select',
+ Boolean = 'boolean',
+ JSON = 'json',
+}
+
+export const ConfigCategory = {
+ OCR: $localize`OCR Settings`,
+}
+
+export interface ConfigOption {
+ key: string
+ title: string
+ type: ConfigOptionType
+ choices?: Array<{ id: string; name: string }>
+ config_key?: string
+ category: string
+}
+
+function mapToItems(enumObj: Object): Array<{ id: string; name: string }> {
+ return Object.keys(enumObj).map((key) => {
+ return {
+ id: enumObj[key],
+ name: enumObj[key],
+ }
+ })
+}
+
+export const PaperlessConfigOptions: ConfigOption[] = [
+ {
+ key: 'output_type',
+ title: $localize`Output Type`,
+ type: ConfigOptionType.Select,
+ choices: mapToItems(OutputTypeConfig),
+ config_key: 'PAPERLESS_OCR_OUTPUT_TYPE',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'language',
+ title: $localize`Language`,
+ type: ConfigOptionType.String,
+ config_key: 'PAPERLESS_OCR_LANGUAGE',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'pages',
+ title: $localize`Pages`,
+ type: ConfigOptionType.Number,
+ config_key: 'PAPERLESS_OCR_PAGES',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'mode',
+ title: $localize`Mode`,
+ type: ConfigOptionType.Select,
+ choices: mapToItems(ModeConfig),
+ config_key: 'PAPERLESS_OCR_MODE',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'skip_archive_file',
+ title: $localize`Skip Archive File`,
+ type: ConfigOptionType.Select,
+ choices: mapToItems(ArchiveFileConfig),
+ config_key: 'PAPERLESS_OCR_SKIP_ARCHIVE_FILE',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'image_dpi',
+ title: $localize`Image DPI`,
+ type: ConfigOptionType.Number,
+ config_key: 'PAPERLESS_OCR_IMAGE_DPI',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'unpaper_clean',
+ title: $localize`Clean`,
+ type: ConfigOptionType.Select,
+ choices: mapToItems(CleanConfig),
+ config_key: 'PAPERLESS_OCR_CLEAN',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'deskew',
+ title: $localize`Deskew`,
+ type: ConfigOptionType.Boolean,
+ config_key: 'PAPERLESS_OCR_DESKEW',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'rotate_pages',
+ title: $localize`Rotate Pages`,
+ type: ConfigOptionType.Boolean,
+ config_key: 'PAPERLESS_OCR_ROTATE_PAGES',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'rotate_pages_threshold',
+ title: $localize`Rotate Pages Threshold`,
+ type: ConfigOptionType.Number,
+ config_key: 'PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'max_image_pixels',
+ title: $localize`Max Image Pixels`,
+ type: ConfigOptionType.Number,
+ config_key: 'PAPERLESS_OCR_IMAGE_DPI',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'color_conversion_strategy',
+ title: $localize`Color Conversion Strategy`,
+ type: ConfigOptionType.Select,
+ choices: mapToItems(ColorConvertConfig),
+ config_key: 'PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY',
+ category: ConfigCategory.OCR,
+ },
+ {
+ key: 'user_args',
+ title: $localize`OCR Arguments`,
+ type: ConfigOptionType.JSON,
+ config_key: 'PAPERLESS_OCR_USER_ARGS',
+ category: ConfigCategory.OCR,
+ },
+]
+
+export interface PaperlessConfig extends ObjectWithId {
+ output_type: OutputTypeConfig
+ pages: number
+ language: string
+ mode: ModeConfig
+ skip_archive_file: ArchiveFileConfig
+ image_dpi: number
+ unpaper_clean: CleanConfig
+ deskew: boolean
+ rotate_pages: boolean
+ rotate_pages_threshold: number
+ max_image_pixels: number
+ color_conversion_strategy: ColorConvertConfig
+ user_args: object
+}
--- /dev/null
+import { TestBed } from '@angular/core/testing'
+
+import { ConfigService } from './config.service'
+import {
+ HttpClientTestingModule,
+ HttpTestingController,
+} from '@angular/common/http/testing'
+import { environment } from 'src/environments/environment'
+import { OutputTypeConfig, PaperlessConfig } from '../data/paperless-config'
+
+describe('ConfigService', () => {
+ let service: ConfigService
+ let httpTestingController: HttpTestingController
+
+ beforeEach(() => {
+ TestBed.configureTestingModule({
+ imports: [HttpClientTestingModule],
+ })
+ service = TestBed.inject(ConfigService)
+ httpTestingController = TestBed.inject(HttpTestingController)
+ })
+
+ it('should call correct API endpoint on get config', () => {
+ service.getConfig().subscribe()
+ httpTestingController
+ .expectOne(`${environment.apiBaseUrl}config/`)
+ .flush([{}])
+ })
+
+ it('should call correct API endpoint on set config', () => {
+ service
+ .saveConfig({
+ id: 1,
+ output_type: OutputTypeConfig.PDF_A,
+ } as PaperlessConfig)
+ .subscribe()
+ const req = httpTestingController.expectOne(
+ `${environment.apiBaseUrl}config/1/`
+ )
+ expect(req.request.method).toEqual('PATCH')
+ })
+})
--- /dev/null
+import { HttpClient } from '@angular/common/http'
+import { Injectable } from '@angular/core'
+import { Observable, first, map } from 'rxjs'
+import { environment } from 'src/environments/environment'
+import { PaperlessConfig } from '../data/paperless-config'
+
+@Injectable({
+ providedIn: 'root',
+})
+export class ConfigService {
+ protected baseUrl: string = environment.apiBaseUrl + 'config/'
+
+ constructor(protected http: HttpClient) {}
+
+ getConfig(): Observable<PaperlessConfig> {
+ return this.http.get<[PaperlessConfig]>(this.baseUrl).pipe(
+ first(),
+ map((configs) => configs[0])
+ )
+ }
+
+ saveConfig(config: PaperlessConfig): Observable<PaperlessConfig> {
+ return this.http
+ .patch<PaperlessConfig>(`${this.baseUrl}${config.id}/`, config)
+ .pipe(first())
+ }
+}
except OSError:
logger.exception("IO error while loading document classification model")
classifier = None
- except Exception: # pragma: nocover
+ except Exception: # pragma: no cover
logger.exception("Unknown error while loading document classification model")
classifier = None
return True
- def preprocess_content(self, content: str) -> str: # pragma: nocover
+ def preprocess_content(self, content: str) -> str: # pragma: no cover
"""
Process to contents of a document, distilling it down into
words which are meaningful to the content
document_parser: DocumentParser = parser_class(
self.logging_group,
- progress_callback,
+ progress_callback=progress_callback,
)
self.log.debug(f"Parser: {type(document_parser).__name__}")
try:
from inotifyrecursive import INotify
from inotifyrecursive import flags
-except ImportError: # pragma: nocover
+except ImportError: # pragma: no cover
INotify = flags = None
logger = logging.getLogger("paperless.management.consumer")
from documents.utils import copy_file_with_basic_stats
from paperless import version
from paperless.db import GnuPG
+from paperless.models import ApplicationConfiguration
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
serializers.serialize("json", CustomField.objects.all()),
)
+ manifest += json.loads(
+ serializers.serialize("json", ApplicationConfiguration.objects.all()),
+ )
+
# These are treated specially and included in the per-document manifest
# if that setting is enabled. Otherwise, they are just exported to the bulk
# manifest
# This class is used to migrate data between databases
# That's difficult to test
-class Command(LoadDataCommand): # pragma: nocover
+class Command(LoadDataCommand): # pragma: no cover
"""
Allow the loading of data from standard in. Sourced originally from:
https://gist.github.com/bmispelon/ad5a2c333443b3a1d051 (MIT licensed)
if not options:
return None
+ best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
+
# Return the parser with the highest weight.
- return sorted(options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
+ return best_parser["parser"]
def run_convert(
def __init__(self, logging_group, progress_callback=None):
super().__init__()
self.logging_group = logging_group
+ self.settings = self.get_settings()
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
if self.progress_callback:
self.progress_callback(current_progress, max_progress)
+ def get_settings(self): # pragma: no cover
+ """
+ A parser must implement this
+ """
+ raise NotImplementedError
+
def read_file_handle_unicode_errors(self, filepath: Path) -> str:
"""
Helper utility for reading from a file, and handling a problem with its
self.assertEqual(info.title, "anotherall")
-class DummyParser(DocumentParser):
+class _BaseTestParser(DocumentParser):
+ def get_settings(self):
+ """
+ This parser does not implement additional settings yet
+ """
+ return None
+
+
+class DummyParser(_BaseTestParser):
def __init__(self, logging_group, scratch_dir, archive_path):
super().__init__(logging_group, None)
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
self.text = "The Text"
-class CopyParser(DocumentParser):
+class CopyParser(_BaseTestParser):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
shutil.copy(document_path, self.archive_path)
-class FaultyParser(DocumentParser):
+class FaultyParser(_BaseTestParser):
def __init__(self, logging_group, scratch_dir):
super().__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
raise ParseError("Does not compute.")
-class FaultyGenericExceptionParser(DocumentParser):
+class FaultyGenericExceptionParser(_BaseTestParser):
def __init__(self, logging_group, scratch_dir):
super().__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
manifest = self._do_export(use_filename_format=use_filename_format)
- self.assertEqual(len(manifest), 172)
+ self.assertEqual(len(manifest), 178)
# dont include consumer or AnonymousUser users
self.assertEqual(
self.assertEqual(Document.objects.get(id=self.d4.id).title, "wow_dec")
self.assertEqual(GroupObjectPermission.objects.count(), 1)
self.assertEqual(UserObjectPermission.objects.count(), 1)
- self.assertEqual(Permission.objects.count(), 124)
+ self.assertEqual(Permission.objects.count(), 128)
messages = check_sanity()
# everything is alright after the test
self.assertEqual(len(messages), 0)
os.path.join(self.dirs.media_dir, "documents"),
)
- self.assertEqual(ContentType.objects.count(), 31)
- self.assertEqual(Permission.objects.count(), 124)
+ self.assertEqual(ContentType.objects.count(), 32)
+ self.assertEqual(Permission.objects.count(), 128)
manifest = self._do_export()
with paperless_environment():
self.assertEqual(
len(list(filter(lambda e: e["model"] == "auth.permission", manifest))),
- 124,
+ 128,
)
# add 1 more to db to show objects are not re-created by import
Permission.objects.create(
codename="test_perm",
content_type_id=1,
)
- self.assertEqual(Permission.objects.count(), 125)
+ self.assertEqual(Permission.objects.count(), 129)
# will cause an import error
self.user.delete()
with self.assertRaises(IntegrityError):
call_command("document_importer", "--no-progress-bar", self.target)
- self.assertEqual(ContentType.objects.count(), 31)
- self.assertEqual(Permission.objects.count(), 125)
+ self.assertEqual(ContentType.objects.count(), 32)
+ self.assertEqual(Permission.objects.count(), 129)
--- /dev/null
+import dataclasses
+import json
+from typing import Optional
+
+from django.conf import settings
+
+from paperless.models import ApplicationConfiguration
+
+
+@dataclasses.dataclass
+class OutputTypeConfig:
+ """
+ Almost all parsers care about the chosen PDF output format
+ """
+
+ output_type: str = dataclasses.field(init=False)
+
+ @staticmethod
+ def _get_config_instance() -> ApplicationConfiguration:
+ app_config = ApplicationConfiguration.objects.all().first()
+ # Workaround for a test where the migration hasn't run to create the single model
+ if app_config is None:
+ ApplicationConfiguration.objects.create()
+ app_config = ApplicationConfiguration.objects.all().first()
+ return app_config
+
+ def __post_init__(self) -> None:
+ app_config = self._get_config_instance()
+
+ self.output_type = app_config.output_type or settings.OCR_OUTPUT_TYPE
+
+
+@dataclasses.dataclass
+class OcrConfig(OutputTypeConfig):
+ """
+ Specific settings for the Tesseract based parser. Options generally
+ correspond almost directly to the OCRMyPDF options
+ """
+
+ pages: Optional[int] = dataclasses.field(init=False)
+ language: str = dataclasses.field(init=False)
+ mode: str = dataclasses.field(init=False)
+ skip_archive_file: str = dataclasses.field(init=False)
+ image_dpi: Optional[int] = dataclasses.field(init=False)
+ clean: str = dataclasses.field(init=False)
+ deskew: bool = dataclasses.field(init=False)
+ rotate: bool = dataclasses.field(init=False)
+ rotate_threshold: float = dataclasses.field(init=False)
+ max_image_pixel: Optional[float] = dataclasses.field(init=False)
+ color_conversion_strategy: str = dataclasses.field(init=False)
+ user_args: Optional[dict[str, str]] = dataclasses.field(init=False)
+
+ def __post_init__(self) -> None:
+ super().__post_init__()
+
+ app_config = self._get_config_instance()
+
+ self.pages = app_config.pages or settings.OCR_PAGES
+ self.language = app_config.language or settings.OCR_LANGUAGE
+ self.mode = app_config.mode or settings.OCR_MODE
+ self.skip_archive_file = (
+ app_config.skip_archive_file or settings.OCR_SKIP_ARCHIVE_FILE
+ )
+ self.image_dpi = app_config.image_dpi or settings.OCR_IMAGE_DPI
+ self.clean = app_config.unpaper_clean or settings.OCR_CLEAN
+ self.deskew = app_config.deskew or settings.OCR_DESKEW
+ self.rotate = app_config.rotate_pages or settings.OCR_ROTATE_PAGES
+ self.rotate_threshold = (
+ app_config.rotate_pages_threshold or settings.OCR_ROTATE_PAGES_THRESHOLD
+ )
+ self.max_image_pixel = (
+ app_config.max_image_pixels or settings.OCR_MAX_IMAGE_PIXELS
+ )
+ self.color_conversion_strategy = (
+ app_config.color_conversion_strategy
+ or settings.OCR_COLOR_CONVERSION_STRATEGY
+ )
+
+ user_args = None
+ if app_config.user_args:
+ user_args = app_config.user_args
+ elif settings.OCR_USER_ARGS is not None: # pragma: no cover
+ try:
+ user_args = json.loads(settings.OCR_USER_ARGS)
+ except json.JSONDecodeError:
+ user_args = {}
+
+ self.user_args = user_args
--- /dev/null
+# Generated by Django 4.2.7 on 2023-12-19 17:51
+
+import django.core.validators
+from django.db import migrations
+from django.db import models
+
+
+def _create_singleton(apps, schema_editor):
+ """
+ Creates the first and only instance of the configuration model
+ """
+ settings_model = apps.get_model("paperless", "ApplicationConfiguration")
+ settings_model.objects.create()
+
+
+class Migration(migrations.Migration):
+ initial = True
+
+ dependencies = []
+
+ operations = [
+ migrations.CreateModel(
+ name="ApplicationConfiguration",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ (
+ "output_type",
+ models.CharField(
+ blank=True,
+ choices=[
+ ("pdf", "pdf"),
+ ("pdfa", "pdfa"),
+ ("pdfa-1", "pdfa-1"),
+ ("pdfa-2", "pdfa-2"),
+ ("pdfa-3", "pdfa-3"),
+ ],
+ max_length=8,
+ null=True,
+ verbose_name="Sets the output PDF type",
+ ),
+ ),
+ (
+ "pages",
+ models.PositiveIntegerField(
+ null=True,
+ validators=[
+ django.core.validators.MinValueValidator(1),
+ ],
+ verbose_name="Do OCR from page 1 to this value",
+ ),
+ ),
+ (
+ "language",
+ models.CharField(
+ blank=True,
+ max_length=32,
+ null=True,
+ verbose_name="Do OCR using these languages",
+ ),
+ ),
+ (
+ "mode",
+ models.CharField(
+ blank=True,
+ choices=[
+ ("skip", "skip"),
+ ("redo", "redo"),
+ ("force", "force"),
+ ("skip_noarchive", "skip_noarchive"),
+ ],
+ max_length=16,
+ null=True,
+ verbose_name="Sets the OCR mode",
+ ),
+ ),
+ (
+ "skip_archive_file",
+ models.CharField(
+ blank=True,
+ choices=[
+ ("never", "never"),
+ ("with_text", "with_text"),
+ ("always", "always"),
+ ],
+ max_length=16,
+ null=True,
+ verbose_name="Controls the generation of an archive file",
+ ),
+ ),
+ (
+ "image_dpi",
+ models.PositiveIntegerField(
+ null=True,
+ validators=[
+ django.core.validators.MinValueValidator(1),
+ ],
+ verbose_name="Sets image DPI fallback value",
+ ),
+ ),
+ (
+ "unpaper_clean",
+ models.CharField(
+ blank=True,
+ choices=[
+ ("clean", "clean"),
+ ("clean-final", "clean-final"),
+ ("none", "none"),
+ ],
+ max_length=16,
+ null=True,
+ verbose_name="Controls the unpaper cleaning",
+ ),
+ ),
+ (
+ "deskew",
+ models.BooleanField(null=True, verbose_name="Enables deskew"),
+ ),
+ (
+ "rotate_pages",
+ models.BooleanField(
+ null=True,
+ verbose_name="Enables page rotation",
+ ),
+ ),
+ (
+ "rotate_pages_threshold",
+ models.FloatField(
+ null=True,
+ validators=[django.core.validators.MinValueValidator(0.0)],
+ verbose_name="Sets the threshold for rotation of pages",
+ ),
+ ),
+ (
+ "max_image_pixels",
+ models.FloatField(
+ null=True,
+ validators=[
+ django.core.validators.MinValueValidator(1000000.0),
+ ],
+ verbose_name="Sets the maximum image size for decompression",
+ ),
+ ),
+ (
+ "color_conversion_strategy",
+ models.CharField(
+ blank=True,
+ choices=[
+ ("LeaveColorUnchanged", "LeaveColorUnchanged"),
+ ("RGB", "RGB"),
+ ("UseDeviceIndependentColor", "UseDeviceIndependentColor"),
+ ("Gray", "Gray"),
+ ("CMYK", "CMYK"),
+ ],
+ max_length=32,
+ null=True,
+ verbose_name="Sets the Ghostscript color conversion strategy",
+ ),
+ ),
+ (
+ "user_args",
+ models.JSONField(
+ null=True,
+ verbose_name="Adds additional user arguments for OCRMyPDF",
+ ),
+ ),
+ ],
+ options={
+ "verbose_name": "paperless application settings",
+ },
+ ),
+ migrations.RunPython(_create_singleton, migrations.RunPython.noop),
+ ]
--- /dev/null
+from django.core.validators import MinValueValidator
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+DEFAULT_SINGLETON_INSTANCE_ID = 1
+
+
+class AbstractSingletonModel(models.Model):
+ class Meta:
+ abstract = True
+
+ def save(self, *args, **kwargs):
+ """
+ Always save as the first and only model
+ """
+ self.pk = DEFAULT_SINGLETON_INSTANCE_ID
+ super().save(*args, **kwargs)
+
+
+class OutputTypeChoices(models.TextChoices):
+ """
+ Matches to --output-type
+ """
+
+ PDF = ("pdf", _("pdf"))
+ PDF_A = ("pdfa", _("pdfa"))
+ PDF_A1 = ("pdfa-1", _("pdfa-1"))
+ PDF_A2 = ("pdfa-2", _("pdfa-2"))
+ PDF_A3 = ("pdfa-3", _("pdfa-3"))
+
+
+class ModeChoices(models.TextChoices):
+ """
+ Matches to --skip-text, --redo-ocr, --force-ocr
+ and our own custom setting
+ """
+
+ SKIP = ("skip", _("skip"))
+ REDO = ("redo", _("redo"))
+ FORCE = ("force", _("force"))
+ SKIP_NO_ARCHIVE = ("skip_noarchive", _("skip_noarchive"))
+
+
+class ArchiveFileChoices(models.TextChoices):
+ """
+ Settings to control creation of an archive PDF file
+ """
+
+ NEVER = ("never", _("never"))
+ WITH_TEXT = ("with_text", _("with_text"))
+ ALWAYS = ("always", _("always"))
+
+
+class CleanChoices(models.TextChoices):
+ """
+ Matches to --clean, --clean-final
+ """
+
+ CLEAN = ("clean", _("clean"))
+ FINAL = ("clean-final", _("clean-final"))
+ NONE = ("none", _("none"))
+
+
+class ColorConvertChoices(models.TextChoices):
+ """
+ Refer to the Ghostscript documentation for valid options
+ """
+
+ UNCHANGED = ("LeaveColorUnchanged", _("LeaveColorUnchanged"))
+ RGB = ("RGB", _("RGB"))
+ INDEPENDENT = ("UseDeviceIndependentColor", _("UseDeviceIndependentColor"))
+ GRAY = ("Gray", _("Gray"))
+ CMYK = ("CMYK", _("CMYK"))
+
+
+class ApplicationConfiguration(AbstractSingletonModel):
+ """
+ Settings which are common across more than 1 parser
+ """
+
+ output_type = models.CharField(
+ verbose_name=_("Sets the output PDF type"),
+ null=True,
+ blank=True,
+ max_length=8,
+ choices=OutputTypeChoices.choices,
+ )
+
+ """
+ Settings for the Tesseract based OCR parser
+ """
+
+ pages = models.PositiveIntegerField(
+ verbose_name=_("Do OCR from page 1 to this value"),
+ null=True,
+ validators=[MinValueValidator(1)],
+ )
+
+ language = models.CharField(
+ verbose_name=_("Do OCR using these languages"),
+ null=True,
+ blank=True,
+ max_length=32,
+ )
+
+ mode = models.CharField(
+ verbose_name=_("Sets the OCR mode"),
+ null=True,
+ blank=True,
+ max_length=16,
+ choices=ModeChoices.choices,
+ )
+
+ skip_archive_file = models.CharField(
+ verbose_name=_("Controls the generation of an archive file"),
+ null=True,
+ blank=True,
+ max_length=16,
+ choices=ArchiveFileChoices.choices,
+ )
+
+ image_dpi = models.PositiveIntegerField(
+ verbose_name=_("Sets image DPI fallback value"),
+ null=True,
+ validators=[MinValueValidator(1)],
+ )
+
+ # Can't call it clean, that's a model method
+ unpaper_clean = models.CharField(
+ verbose_name=_("Controls the unpaper cleaning"),
+ null=True,
+ blank=True,
+ max_length=16,
+ choices=CleanChoices.choices,
+ )
+
+ deskew = models.BooleanField(verbose_name=_("Enables deskew"), null=True)
+
+ rotate_pages = models.BooleanField(
+ verbose_name=_("Enables page rotation"),
+ null=True,
+ )
+
+ rotate_pages_threshold = models.FloatField(
+ verbose_name=_("Sets the threshold for rotation of pages"),
+ null=True,
+ validators=[MinValueValidator(0.0)],
+ )
+
+ max_image_pixels = models.FloatField(
+ verbose_name=_("Sets the maximum image size for decompression"),
+ null=True,
+ validators=[MinValueValidator(1_000_000.0)],
+ )
+
+ color_conversion_strategy = models.CharField(
+ verbose_name=_("Sets the Ghostscript color conversion strategy"),
+ blank=True,
+ null=True,
+ max_length=32,
+ choices=ColorConvertChoices.choices,
+ )
+
+ user_args = models.JSONField(
+ verbose_name=_("Adds additional user arguments for OCRMyPDF"),
+ null=True,
+ )
+
+ class Meta:
+ verbose_name = _("paperless application settings")
+
+ def __str__(self) -> str: # pragma: no cover
+ return "ApplicationConfiguration"
from django.contrib.auth.models import User
from rest_framework import serializers
+from paperless.models import ApplicationConfiguration
+
class ObfuscatedUserPasswordField(serializers.Field):
"""
"last_name",
"auth_token",
)
+
+
+class ApplicationConfigurationSerializer(serializers.ModelSerializer):
+ class Meta:
+ model = ApplicationConfiguration
+ fields = "__all__"
return int(os.getenv(key, default))
+def __get_optional_int(key: str) -> Optional[int]:
+ """
+ Returns None if the environment key is not present, otherwise an integer
+ """
+ if key in os.environ:
+ return __get_int(key, -1) # pragma: no cover
+ return None
+
+
def __get_float(key: str, default: float) -> float:
"""
Return an integer value based on the environment variable or a default
def __get_path(
key: str,
- default: Optional[Union[PathLike, str]] = None,
-) -> Optional[Path]:
+ default: Union[PathLike, str],
+) -> Path:
"""
Return a normalized, absolute path based on the environment variable or a default,
- if provided. If not set and no default, returns None
+ if provided
"""
if key in os.environ:
return Path(os.environ[key]).resolve()
- elif default is not None:
- return Path(default).resolve()
- else:
- return None
+ return Path(default).resolve()
+
+
+def __get_optional_path(key: str) -> Optional[Path]:
+ """
+ Returns None if the environment key is not present, otherwise a fully resolved Path
+ """
+ if key in os.environ:
+ return __get_path(key, "")
+ return None
def __get_list(
]
# Optional to enable compression
-if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: nocover
+if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
ROOT_URLCONF = "paperless.urls"
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
-EMAIL_CERTIFICATE_FILE = __get_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
+EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
###############################################################################
"PATCHT",
)
-consumer_barcode_scanner_tmp: Final[str] = os.getenv(
+CONSUMER_BARCODE_SCANNER: Final[str] = os.getenv(
"PAPERLESS_CONSUMER_BARCODE_SCANNER",
"PYZBAR",
-)
-CONSUMER_BARCODE_SCANNER = consumer_barcode_scanner_tmp.upper()
+).upper()
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
"ASN",
)
-
-CONSUMER_BARCODE_UPSCALE: Final[float] = float(
- os.getenv("PAPERLESS_CONSUMER_BARCODE_UPSCALE", 0.0),
+CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
+ "PAPERLESS_CONSUMER_BARCODE_UPSCALE",
+ 0.0,
)
-
-CONSUMER_BARCODE_DPI: Final[str] = int(
- os.getenv("PAPERLESS_CONSUMER_BARCODE_DPI", 300),
-)
+CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
)
-OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
+OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
-OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
+OCR_IMAGE_DPI = __get_optional_int("PAPERLESS_OCR_IMAGE_DPI")
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
-OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
+OCR_DESKEW: Final[bool] = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
-OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
+OCR_ROTATE_PAGES: Final[bool] = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
-OCR_ROTATE_PAGES_THRESHOLD = float(
- os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0),
+OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
+ "PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
+ 12.0,
)
-OCR_MAX_IMAGE_PIXELS: Optional[int] = None
-if os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS") is not None:
- OCR_MAX_IMAGE_PIXELS: int = int(os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS"))
+OCR_MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
+ "PAPERLESS_OCR_MAX_IMAGE_PIXELS",
+)
OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
"PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY",
"RGB",
)
-OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
+OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
# GNUPG needs a home directory for some reason
GNUPG_HOME = os.getenv("HOME", "/tmp")
from documents.views import UiSettingsView
from documents.views import UnifiedSearchViewSet
from paperless.consumers import StatusConsumer
+from paperless.views import ApplicationConfigurationViewSet
from paperless.views import FaviconView
from paperless.views import GenerateAuthTokenView
from paperless.views import GroupViewSet
api_router.register(r"share_links", ShareLinkViewSet)
api_router.register(r"consumption_templates", ConsumptionTemplateViewSet)
api_router.register(r"custom_fields", CustomFieldViewSet)
+api_router.register(r"config", ApplicationConfigurationViewSet)
urlpatterns = [
from documents.permissions import PaperlessObjectPermissions
from paperless.filters import GroupFilterSet
from paperless.filters import UserFilterSet
+from paperless.models import ApplicationConfiguration
+from paperless.serialisers import ApplicationConfigurationSerializer
from paperless.serialisers import GroupSerializer
from paperless.serialisers import ProfileSerializer
from paperless.serialisers import UserSerializer
class FaviconView(View):
- def get(self, request, *args, **kwargs): # pragma: nocover
+ def get(self, request, *args, **kwargs): # pragma: no cover
favicon = os.path.join(
os.path.dirname(__file__),
"static",
return Response(
token.key,
)
+
+
+class ApplicationConfigurationViewSet(ModelViewSet):
+ model = ApplicationConfiguration
+
+ queryset = ApplicationConfiguration.objects
+
+ serializer_class = ApplicationConfigurationSerializer
+ permission_classes = (IsAuthenticated,)
M: MailBox,
message_uid: str,
parameter: str,
- ): # pragma: nocover
+ ): # pragma: no cover
"""
Perform mail action on the given mail uid in the mailbox.
"""
return AND(NOT(gmail_label=self.keyword), no_keyword=self.keyword)
else:
return {"no_keyword": self.keyword}
- else: # pragma: nocover
+ else: # pragma: no cover
raise ValueError("This should never happen.")
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
elif rule.action == MailRule.MailAction.TAG:
return TagMailAction(rule.action_parameter, supports_gmail_labels)
else:
- raise NotImplementedError("Unknown action.") # pragma: nocover
+ raise NotImplementedError("Unknown action.") # pragma: no cover
def make_criterias(rule: MailRule, supports_gmail_labels: bool):
Returns the correct MailBox instance for the given configuration.
"""
ssl_context = ssl.create_default_context()
- if settings.EMAIL_CERTIFICATE_FILE is not None: # pragma: nocover
+ if settings.EMAIL_CERTIFICATE_FILE is not None: # pragma: no cover
ssl_context.load_verify_locations(cafile=settings.EMAIL_CERTIFICATE_FILE)
if security == MailAccount.ImapSecurity.NONE:
elif security == MailAccount.ImapSecurity.SSL:
mailbox = MailBox(server, port, ssl_context=ssl_context)
else:
- raise NotImplementedError("Unknown IMAP security") # pragma: nocover
+ raise NotImplementedError("Unknown IMAP security") # pragma: no cover
return mailbox
else:
raise NotImplementedError(
"Unknown title selector.",
- ) # pragma: nocover
+ ) # pragma: no cover
def _get_correspondent(
self,
else:
raise NotImplementedError(
"Unknown correspondent selector",
- ) # pragma: nocover
+ ) # pragma: no cover
def handle_mail_account(self, account: MailAccount):
"""
html_pdf = tempdir / "html.pdf"
html_pdf.write_bytes(response.content)
return html_pdf
+
+ def get_settings(self):
+ """
+ This parser does not implement additional settings yet
+ """
+ return None
-import json
import os
import re
import subprocess
import tempfile
from pathlib import Path
+from typing import TYPE_CHECKING
from typing import Optional
from django.conf import settings
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
+from paperless.config import OcrConfig
+from paperless.models import ArchiveFileChoices
+from paperless.models import CleanChoices
+from paperless.models import ModeChoices
class NoTextFoundException(Exception):
logging_name = "paperless.parsing.tesseract"
+ def get_settings(self) -> OcrConfig:
+ """
+ This parser uses the OCR configuration settings to parse documents
+ """
+ return OcrConfig()
+
def extract_metadata(self, document_path, mime_type):
result = []
if mime_type == "application/pdf":
self.logging_group,
)
- def is_image(self, mime_type):
+ def is_image(self, mime_type) -> bool:
return mime_type in [
"image/png",
"image/jpeg",
"image/webp",
]
- def has_alpha(self, image):
+ def has_alpha(self, image) -> bool:
with Image.open(image) as im:
return im.mode in ("RGBA", "LA")
],
)
- def get_dpi(self, image):
+ def get_dpi(self, image) -> Optional[int]:
try:
with Image.open(image) as im:
x, y = im.info["dpi"]
self.log.warning(f"Error while getting DPI from image {image}: {e}")
return None
- def calculate_a4_dpi(self, image):
+ def calculate_a4_dpi(self, image) -> Optional[int]:
try:
with Image.open(image) as im:
width, height = im.size
self.log.warning(f"Error while calculating DPI for image {image}: {e}")
return None
- def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
+ def extract_text(
+ self,
+ sidecar_file: Optional[Path],
+ pdf_file: Path,
+ ) -> Optional[str]:
# When re-doing OCR, the sidecar contains ONLY the new text, not
# the whole text, so do not utilize it in that case
if (
sidecar_file is not None
and os.path.isfile(sidecar_file)
- and settings.OCR_MODE != "redo"
+ and self.settings.mode != "redo"
):
text = self.read_file_handle_unicode_errors(sidecar_file)
sidecar_file,
safe_fallback=False,
):
+ if TYPE_CHECKING:
+ assert isinstance(self.settings, OcrConfig)
ocrmypdf_args = {
"input_file": input_file,
"output_file": output_file,
# processes via the task library.
"use_threads": True,
"jobs": settings.THREADS_PER_WORKER,
- "language": settings.OCR_LANGUAGE,
- "output_type": settings.OCR_OUTPUT_TYPE,
+ "language": self.settings.language,
+ "output_type": self.settings.output_type,
"progress_bar": False,
}
if "pdfa" in ocrmypdf_args["output_type"]:
ocrmypdf_args[
"color_conversion_strategy"
- ] = settings.OCR_COLOR_CONVERSION_STRATEGY
+ ] = self.settings.color_conversion_strategy
- if settings.OCR_MODE == "force" or safe_fallback:
+ if self.settings.mode == ModeChoices.FORCE or safe_fallback:
ocrmypdf_args["force_ocr"] = True
- elif settings.OCR_MODE in ["skip", "skip_noarchive"]:
+ elif self.settings.mode in {
+ ModeChoices.SKIP,
+ ModeChoices.SKIP_NO_ARCHIVE,
+ }:
ocrmypdf_args["skip_text"] = True
- elif settings.OCR_MODE == "redo":
+ elif self.settings.mode == ModeChoices.REDO:
ocrmypdf_args["redo_ocr"] = True
- else:
- raise ParseError(f"Invalid ocr mode: {settings.OCR_MODE}")
+ else: # pragma: no cover
+ raise ParseError(f"Invalid ocr mode: {self.settings.mode}")
- if settings.OCR_CLEAN == "clean":
+ if self.settings.clean == CleanChoices.CLEAN:
ocrmypdf_args["clean"] = True
- elif settings.OCR_CLEAN == "clean-final":
- if settings.OCR_MODE == "redo":
+ elif self.settings.clean == CleanChoices.FINAL:
+ if self.settings.mode == ModeChoices.REDO:
ocrmypdf_args["clean"] = True
else:
# --clean-final is not compatible with --redo-ocr
ocrmypdf_args["clean_final"] = True
- if settings.OCR_DESKEW and settings.OCR_MODE != "redo":
+ if self.settings.deskew and self.settings.mode != ModeChoices.REDO:
# --deskew is not compatible with --redo-ocr
ocrmypdf_args["deskew"] = True
- if settings.OCR_ROTATE_PAGES:
+ if self.settings.rotate:
ocrmypdf_args["rotate_pages"] = True
- ocrmypdf_args[
- "rotate_pages_threshold"
- ] = settings.OCR_ROTATE_PAGES_THRESHOLD
+ ocrmypdf_args["rotate_pages_threshold"] = self.settings.rotate_threshold
- if settings.OCR_PAGES > 0:
- ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}"
+ if self.settings.pages is not None:
+ ocrmypdf_args["pages"] = f"1-{self.settings.pages}"
else:
# sidecar is incompatible with pages
ocrmypdf_args["sidecar"] = sidecar_file
if dpi:
self.log.debug(f"Detected DPI for image {input_file}: {dpi}")
ocrmypdf_args["image_dpi"] = dpi
- elif settings.OCR_IMAGE_DPI:
- ocrmypdf_args["image_dpi"] = settings.OCR_IMAGE_DPI
+ elif self.settings.image_dpi is not None:
+ ocrmypdf_args["image_dpi"] = self.settings.image_dpi
elif a4_dpi:
ocrmypdf_args["image_dpi"] = a4_dpi
else:
f"Image DPI of {ocrmypdf_args['image_dpi']} is low, OCR may fail",
)
- if settings.OCR_USER_ARGS:
+ if self.settings.user_args is not None:
try:
- user_args = json.loads(settings.OCR_USER_ARGS)
- ocrmypdf_args = {**ocrmypdf_args, **user_args}
+ ocrmypdf_args = {**ocrmypdf_args, **self.settings.user_args}
except Exception as e:
self.log.warning(
f"There is an issue with PAPERLESS_OCR_USER_ARGS, so "
f"they will not be used. Error: {e}",
)
- if settings.OCR_MAX_IMAGE_PIXELS is not None:
+ if self.settings.max_image_pixel is not None:
# Convert pixels to mega-pixels and provide to ocrmypdf
- max_pixels_mpixels = settings.OCR_MAX_IMAGE_PIXELS / 1_000_000.0
+ max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0
if max_pixels_mpixels > 0:
self.log.debug(
f"Calculated {max_pixels_mpixels} megapixels for OCR",
# If the original has text, and the user doesn't want an archive,
# we're done here
skip_archive_for_text = (
- settings.OCR_MODE == "skip_noarchive"
- or settings.OCR_SKIP_ARCHIVE_FILE in ["with_text", "always"]
+ self.settings.mode == ModeChoices.SKIP_NO_ARCHIVE
+ or self.settings.skip_archive_file
+ in {
+ ArchiveFileChoices.WITH_TEXT,
+ ArchiveFileChoices.ALWAYS,
+ }
)
if skip_archive_for_text and original_has_text:
self.log.debug("Document has text, skipping OCRmyPDF entirely.")
self.log.debug(f"Calling OCRmyPDF with args: {args}")
ocrmypdf.ocr(**args)
- if settings.OCR_SKIP_ARCHIVE_FILE != "always":
+ if self.settings.skip_archive_file != ArchiveFileChoices.ALWAYS:
self.archive_path = archive_path
self.text = self.extract_text(sidecar_file, archive_path)
import shutil
import tempfile
import uuid
-from contextlib import AbstractContextManager
from pathlib import Path
from unittest import mock
from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_tesseract.parsers import post_process_text
-image_to_string_calls = []
-
-
-def fake_convert(input_file, output_file, **kwargs):
- with open(input_file) as f:
- lines = f.readlines()
-
- for i, line in enumerate(lines):
- with open(output_file % i, "w") as f2:
- f2.write(line.strip())
-
-
-class FakeImageFile(AbstractContextManager):
- def __init__(self, fname):
- self.fname = fname
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- pass
-
- def __enter__(self):
- return os.path.basename(self.fname)
-
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
self.assertEqual(params["sidecar"], "sidecar.txt")
with override_settings(OCR_CLEAN="none"):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertNotIn("clean", params)
self.assertNotIn("clean_final", params)
with override_settings(OCR_CLEAN="clean"):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertTrue(params["clean"])
self.assertNotIn("clean_final", params)
with override_settings(OCR_CLEAN="clean-final", OCR_MODE="skip"):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertTrue(params["clean_final"])
self.assertNotIn("clean", params)
with override_settings(OCR_CLEAN="clean-final", OCR_MODE="redo"):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertTrue(params["clean"])
self.assertNotIn("clean_final", params)
with override_settings(OCR_DESKEW=True, OCR_MODE="skip"):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertTrue(params["deskew"])
with override_settings(OCR_DESKEW=True, OCR_MODE="redo"):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertNotIn("deskew", params)
with override_settings(OCR_DESKEW=False, OCR_MODE="skip"):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertNotIn("deskew", params)
with override_settings(OCR_MAX_IMAGE_PIXELS=1_000_001.0):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertIn("max_image_mpixels", params)
self.assertAlmostEqual(params["max_image_mpixels"], 1, places=4)
with override_settings(OCR_MAX_IMAGE_PIXELS=-1_000_001.0):
+ parser = RasterisedDocumentParser(None)
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertNotIn("max_image_mpixels", params)
--- /dev/null
+import json
+
+from django.test import TestCase
+from django.test import override_settings
+
+from documents.tests.utils import DirectoriesMixin
+from documents.tests.utils import FileSystemAssertsMixin
+from paperless.models import ApplicationConfiguration
+from paperless.models import CleanChoices
+from paperless.models import ColorConvertChoices
+from paperless.models import ModeChoices
+from paperless.models import OutputTypeChoices
+from paperless_tesseract.parsers import RasterisedDocumentParser
+
+
+class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
+ @staticmethod
+ def get_params():
+ """
+ Helper to get just the OCRMyPDF parameters from the parser
+ """
+ return RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
+ input_file="input.pdf",
+ output_file="output.pdf",
+ sidecar_file="sidecar.txt",
+ mime_type="application/pdf",
+ safe_fallback=False,
+ )
+
+ def test_db_settings_ocr_pages(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_PAGES than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_PAGES=10):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.pages = 5
+ instance.save()
+
+ params = self.get_params()
+ self.assertEqual(params["pages"], "1-5")
+
+ def test_db_settings_ocr_language(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_LANGUAGE than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_LANGUAGE="eng+deu"):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.language = "fra+ita"
+ instance.save()
+
+ params = self.get_params()
+ self.assertEqual(params["language"], "fra+ita")
+
+ def test_db_settings_ocr_output_type(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_OUTPUT_TYPE than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_OUTPUT_TYPE="pdfa-3"):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.output_type = OutputTypeChoices.PDF_A
+ instance.save()
+
+ params = self.get_params()
+ self.assertEqual(params["output_type"], "pdfa")
+
+ def test_db_settings_ocr_mode(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_MODE than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_MODE="redo"):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.mode = ModeChoices.SKIP
+ instance.save()
+
+ params = self.get_params()
+ self.assertTrue(params["skip_text"])
+ self.assertNotIn("redo_ocr", params)
+ self.assertNotIn("force_ocr", params)
+
+ def test_db_settings_ocr_clean(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_CLEAN than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_CLEAN="clean-final"):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.unpaper_clean = CleanChoices.CLEAN
+ instance.save()
+
+ params = self.get_params()
+ self.assertTrue(params["clean"])
+ self.assertNotIn("clean_final", params)
+
+ with override_settings(OCR_CLEAN="clean-final"):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.unpaper_clean = CleanChoices.FINAL
+ instance.save()
+
+ params = self.get_params()
+ self.assertTrue(params["clean_final"])
+ self.assertNotIn("clean", params)
+
+ def test_db_settings_ocr_deskew(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_DESKEW than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_DESKEW=False):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.deskew = True
+ instance.save()
+
+ params = self.get_params()
+ self.assertTrue(params["deskew"])
+
+ def test_db_settings_ocr_rotate(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_ROTATE_PAGES
+ and OCR_ROTATE_PAGES_THRESHOLD than configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.rotate_pages = True
+ instance.rotate_pages_threshold = 15.0
+ instance.save()
+
+ params = self.get_params()
+ self.assertTrue(params["rotate_pages"])
+ self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0)
+
+ def test_db_settings_ocr_max_pixels(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_MAX_IMAGE_PIXELS than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.max_image_pixels = 1_000_000.0
+ instance.save()
+
+ params = self.get_params()
+ self.assertAlmostEqual(params["max_image_mpixels"], 1.0)
+
+ def test_db_settings_ocr_color_convert(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_COLOR_CONVERSION_STRATEGY than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.color_conversion_strategy = ColorConvertChoices.INDEPENDENT
+ instance.save()
+
+ params = self.get_params()
+ self.assertEqual(
+ params["color_conversion_strategy"],
+ "UseDeviceIndependentColor",
+ )
+
+ def test_ocr_user_args(self):
+ """
+ GIVEN:
+ - Django settings defines different value for OCR_USER_ARGS than
+ configuration object
+ WHEN:
+ - OCR parameters are constructed
+ THEN:
+ - Configuration from database is utilized
+ """
+ with override_settings(
+ OCR_USER_ARGS=json.dumps({"continue_on_soft_render_error": True}),
+ ):
+ instance = ApplicationConfiguration.objects.all().first()
+ instance.user_args = {"unpaper_args": "--pre-rotate 90"}
+ instance.save()
+
+ params = self.get_params()
+
+ self.assertIn("unpaper_args", params)
+ self.assertEqual(
+ params["unpaper_args"],
+ "--pre-rotate 90",
+ )
def parse(self, document_path, mime_type, file_name=None):
self.text = self.read_file_handle_unicode_errors(document_path)
+
+ def get_settings(self):
+ """
+ This parser does not implement additional settings yet
+ """
+ return None
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
+from paperless.config import OutputTypeConfig
+from paperless.models import OutputTypeChoices
class TikaDocumentParser(DocumentParser):
document_path.read_bytes(),
mime_type,
)
- else: # pragma: nocover
+ else: # pragma: no cover
raise
except Exception as err:
raise ParseError(
timeout=settings.CELERY_TASK_TIME_LIMIT,
) as client, client.libre_office.to_pdf() as route:
# Set the output format of the resulting PDF
- if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
+ if settings.OCR_OUTPUT_TYPE in {
+ OutputTypeChoices.PDF_A,
+ OutputTypeChoices.PDF_A2,
+ }:
route.pdf_format(PdfAFormat.A2b)
- elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
+ elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:
route.pdf_format(PdfAFormat.A1a)
- elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
+ elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:
route.pdf_format(PdfAFormat.A3b)
route.convert(document_path)
raise ParseError(
f"Error while converting document to PDF: {err}",
) from err
+
+ def get_settings(self) -> OutputTypeConfig:
+ """
+ This parser only uses the PDF output type configuration currently
+ """
+ return OutputTypeConfig()
exclude_also =
if settings.AUDIT_LOG_ENABLED:
if AUDIT_LOG_ENABLED:
+ if TYPE_CHECKING:
[mypy]
plugins = mypy_django_plugin.main, mypy_drf_plugin.main, numpy.typing.mypy_plugin