Vectara (#5069)

# Vectara Integration This PR provides integration with Vectara. Implemented here are: * langchain/vectorstore/vectara.py * tests/integration_tests/vectorstores/test_vectara.py * langchain/retrievers/vectara_retriever.py And two IPYNB notebooks to do more testing: * docs/modules/chains/index_examples/vectara_text_generation.ipynb * docs/modules/indexes/vectorstores/examples/vectara.ipynb --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
2025-09-13 13:36:15 +00:00 · 2023-05-24 01:24:58 -07:00
parent 9c4b43b494
commit c81fb88035
7 changed files with 1628 additions and 0 deletions
--- a/tests/integration_tests/vectorstores/test_vectara.py
+++ b/tests/integration_tests/vectorstores/test_vectara.py
@@ -0,0 +1,34 @@
+from langchain.docstore.document import Document
+from langchain.vectorstores.vectara import Vectara
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
+
+
+def get_abbr(s: str) -> str:
+    words = s.split(" ")  # Split the string into words
+    first_letters = [word[0] for word in words]  # Extract the first letter of each word
+    return "".join(first_letters)  # Join the first letters into a single string
+
+
+def test_vectara_add_documents() -> None:
+    """Test end to end construction and search."""
+
+    # start with some initial documents
+    texts = ["grounded generation", "retrieval augmented generation", "data privacy"]
+    docsearch: Vectara = Vectara.from_texts(
+        texts,
+        embedding=FakeEmbeddings(),
+        metadatas=[{"abbr": "gg"}, {"abbr": "rag"}, {"abbr": "dp"}],
+    )
+
+    # then add some additional documents
+    new_texts = ["large language model", "information retrieval", "question answering"]
+    docsearch.add_documents(
+        [Document(page_content=t, metadata={"abbr": get_abbr(t)}) for t in new_texts]
+    )
+
+    # finally do a similarity search to see if all works okay
+    output = docsearch.similarity_search("large language model", k=2)
+    assert output[0].page_content == "large language model"
+    assert output[0].metadata == {"abbr": "llm"}
+    assert output[1].page_content == "information retrieval"
+    assert output[1].metadata == {"abbr": "ir"}