Bagatur/vectara nit (#9140)

Co-authored-by: Ofer Mendelevitch <ofer@vectara.com>
2025-09-03 20:16:52 +00:00 · 2023-08-11 15:32:03 -07:00
parent 9b64932e55
commit 45741bcc1b
3 changed files with 27 additions and 7 deletions
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@@ -245,7 +245,7 @@ class Vectara(VectorStore):
        k: int = 5,
        lambda_val: float = 0.025,
        filter: Optional[str] = None,
-        n_sentence_context: int = 0,
+        n_sentence_context: int = 2,
        **kwargs: Any,
    ) -> List[Tuple[Document, float]]:
        """Return Vectara documents most similar to query, along with scores.
@@ -259,7 +259,7 @@ class Vectara(VectorStore):
                https://docs.vectara.com/docs/search-apis/sql/filter-overview
                for more details.
            n_sentence_context: number of sentences before/after the matching segment
-                to add
+                to add, defaults to 2

        Returns:
            List of Documents most similar to the query and score for each.
@@ -328,7 +328,7 @@ class Vectara(VectorStore):
        k: int = 5,
        lambda_val: float = 0.025,
        filter: Optional[str] = None,
-        n_sentence_context: int = 0,
+        n_sentence_context: int = 2,
        **kwargs: Any,
    ) -> List[Document]:
        """Return Vectara documents most similar to query, along with scores.
@@ -341,7 +341,7 @@ class Vectara(VectorStore):
                https://docs.vectara.com/docs/search-apis/sql/filter-overview for more
                details.
            n_sentence_context: number of sentences before/after the matching segment
-                to add
+                to add, defaults to 2

        Returns:
            List of Documents most similar to the query
@@ -427,7 +427,7 @@ class VectaraRetriever(VectorStoreRetriever):
            "lambda_val": 0.025,
            "k": 5,
            "filter": "",
-            "n_sentence_context": "0",
+            "n_sentence_context": "2",
        }
    )
    """Search params.
--- a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
@@ -86,7 +86,6 @@ def test_vectara_from_files() -> None:
        n_sentence_context=0,
        filter="doc.test_num = 2",
    )
-    print(output)
    assert output[0].page_content == (
        "By the commonly adopted machine learning tradition "
        "(e.g., Chapter 28 in Murphy, 2012; Deng and Li, 2013), it may be natural "
@@ -94,3 +93,24 @@ def test_vectara_from_files() -> None:
        "(e.g., DNNs) and deep probabilistic generative models (e.g., DBN, Deep "
        "Boltzmann Machine (DBM))."
    )
+
+    # finally do a similarity search to see if all works okay
+    output = docsearch.similarity_search(
+        "By the commonly adopted machine learning tradition",
+        k=1,
+        n_sentence_context=1,
+        filter="doc.test_num = 2",
+    )
+    print(output[0].page_content)
+    assert output[0].page_content == (
+        """\
+Note the use of “hybrid” in 3) above is different from that used sometimes in the literature, \
+which for example refers to the hybrid systems for speech recognition feeding the output probabilities of a neural network into an HMM \
+(Bengio et al., 1991; Bourlard and Morgan, 1993; Morgan, 2012). \
+By the commonly adopted machine learning tradition (e.g., Chapter 28 in Murphy, 2012; Deng and Li, 2013), \
+it may be natural to just classify deep learning techniques into deep discriminative models (e.g., DNNs) \
+and deep probabilistic generative models (e.g., DBN, Deep Boltzmann Machine (DBM)). \
+This classification scheme, however, misses a key insight gained in deep learning research about how generative \
+models can greatly improve the training of DNNs and other deep discriminative models via better regularization.\
+"""  # noqa: E501
+    )