@method_decorator(login_required, name="dispatch")
class ChatStreamingView(View):
def post(self, request):
+ request.compress_exempt = True
ai_config = AIConfig()
if not ai_config.ai_enabled:
return HttpResponseBadRequest("AI is required for this feature")
response = StreamingHttpResponse(
stream_chat_with_documents(query_str=question, documents=documents),
- content_type="text/plain",
+ content_type="text/event-stream",
)
response["Cache-Control"] = "no-cache"
return response
import logging
+import sys
from llama_index.core import VectorStoreIndex
from llama_index.core.prompts import PromptTemplate
response_stream = query_engine.query(prompt)
- yield from response_stream.response_gen
+ for chunk in response_stream.response_gen:
+ yield chunk
+ sys.stdout.flush()
from urllib.parse import urlparse
from celery.schedules import crontab
+from compression_middleware.middleware import CompressionMiddleware
from concurrent_log_handler.queue import setup_logging_queues
from dateparser.languages.loader import LocaleDataLoader
from django.utils.translation import gettext_lazy as _
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
+# Workaround to not compress streaming responses (e.g. chat).
+# See https://github.com/friedelwolff/django-compression-middleware/pull/7
+original_process_response = CompressionMiddleware.process_response
+
+
+def patched_process_response(self, request, response):
+ if getattr(request, "compress_exempt", False):
+ return response
+ return original_process_response(self, request, response)
+
+
+CompressionMiddleware.process_response = patched_process_response
+
ROOT_URLCONF = "paperless.urls"