)
-def build_document_node(document) -> list[BaseNode]:
+def build_document_node(document: Document) -> list[BaseNode]:
"""
Given a Document, returns parsed Nodes ready for indexing.
"""
raise
-def remove_existing_document_nodes(document, index):
+def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
"""
Removes existing documents from docstore for a given document from the index.
This is necessary because FAISS IndexFlatL2 is append-only.
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
-def llm_index_add_or_update_document(document):
+def llm_index_add_or_update_document(document: Document):
"""
Adds or updates a document in the LLM index.
If the document already exists, it will be replaced.
index = load_or_build_index(storage_context, embed_model, nodes=new_nodes)
if index is None:
- # Nothing to index
return
- # Remove old nodes
- remove_existing_document_nodes(document, index)
+ remove_document_docstore_nodes(document, index)
index.insert_nodes(new_nodes)
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
-def llm_index_remove_document(document):
+def llm_index_remove_document(document: Document):
+ """
+ Removes a document from the LLM index.
+ """
embed_model = get_embedding_model()
llama_settings.embed_model = embed_model
index = load_or_build_index(storage_context, embed_model)
if index is None:
- return # Nothing to remove
+ return
- # Remove old nodes
- remove_existing_document_nodes(document, index)
+ remove_document_docstore_nodes(document, index)
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
index = load_or_build_index()
retriever = VectorIndexRetriever(index=index, similarity_top_k=top_k)
- # Build query from the document text
query_text = (document.title or "") + "\n" + (document.content or "")
results = retriever.retrieve(query_text)
- # Each result.node.metadata["document_id"] should match our stored doc
document_ids = [
int(node.metadata["document_id"])
for node in results