From c4c79da0711ab095895e1c79959e78748ad51f1b Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofer@vectara.com>
Date: Sat, 19 Aug 2023 13:59:52 -0700
Subject: [PATCH 01/58] Updated usage of metadata so that both part and doc
 level metadata is returned properly as a single meta-data dict Updated tests

---
 .../langchain/vectorstores/vectara.py         | 26 ++++---
 .../vectorstores/test_vectara.py              | 77 ++++++++++++-------
 2 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/libs/langchain/langchain/vectorstores/vectara.py b/libs/langchain/langchain/vectorstores/vectara.py
index f263ebeac84..f1c2bc6ca9e 100644
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@@ -202,12 +202,12 @@ class Vectara(VectorStore):
             doc_metadata: optional metadata for the document
 
         This function indexes all the input text strings in the Vectara corpus as a
-        single Vectara document, where each input text is considered a "part" and the
-        metadata are associated with each part.
+        single Vectara document, where each input text is considered a "section" and the
+        metadata are associated with each section.
         if 'doc_metadata' is provided, it is associated with the Vectara document.
 
         Returns:
-            List of ids from adding the texts into the vectorstore.
+            document ID of the document added
 
         """
         doc_hash = md5()
@@ -307,21 +307,27 @@ class Vectara(VectorStore):
         result = response.json()
 
         responses = result["responseSet"][0]["response"]
-        vectara_default_metadata = ["lang", "len", "offset"]
+        documents = result["responseSet"][0]["document"]
+
+        metadatas = []
+        for x in responses:
+            md = { m["name"]: m["value"] for m in x["metadata"] }
+            doc_num = x['documentIndex']
+            doc_md = { m["name"]: m["value"] for m in documents[doc_num]['metadata'] }
+            md.update(doc_md)
+            metadatas.append(md)
+
         docs = [
             (
                 Document(
                     page_content=x["text"],
-                    metadata={
-                        m["name"]: m["value"]
-                        for m in x["metadata"]
-                        if m["name"] not in vectara_default_metadata
-                    },
+                    metadata=md,
                 ),
                 x["score"],
             )
-            for x in responses
+            for x,md in zip(responses,metadatas)
         ]
+
         return docs
 
     def similarity_search(
diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
index 57338e7f994..3b2decfc2f9 100644
--- a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
@@ -5,12 +5,14 @@ from langchain.docstore.document import Document
 from langchain.vectorstores.vectara import Vectara
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 
-# For this test to run properly, please setup as follows
-# 1. Create a corpus in Vectara, with a filter attribute called "test_num".
-# 2. Create an API_KEY for this corpus with permissions for query and indexing
-# 3. Setup environment variables:
+#
+# For this test to run properly, please setup as follows:
+# 1. Create a Vectara account: sign up at https://console.vectara.com/signup
+# 2. Create a corpus in your Vectara account, with a filter attribute called "test_num".
+# 3. Create an API_KEY for this corpus with permissions for query and indexing
+# 4. Setup environment variables:
 #    VECTARA_API_KEY, VECTARA_CORPUS_ID and VECTARA_CUSTOMER_ID
-
+#
 
 def get_abbr(s: str) -> str:
     words = s.split(" ")  # Split the string into words
@@ -21,38 +23,52 @@ def get_abbr(s: str) -> str:
 def test_vectara_add_documents() -> None:
     """Test end to end construction and search."""
 
-    # start with some initial texts
-    texts = ["grounded generation", "retrieval augmented generation", "data privacy"]
-    docsearch: Vectara = Vectara.from_texts(
-        texts,
-        embedding=FakeEmbeddings(),
-        metadatas=[
-            {"abbr": "gg", "test_num": "1"},
-            {"abbr": "rag", "test_num": "1"},
-            {"abbr": "dp", "test_num": "1"},
-        ],
+    # create a new Vectara instance
+    docsearch: Vectara = Vectara()
+
+    # start with some initial texts, added with add_texts
+    texts1 = ["grounded generation", "retrieval augmented generation", "data privacy"]
+    md = [{"abbr": get_abbr(t)} for t in texts1]
+    doc_id1 = docsearch.add_texts(
+        texts1,
+        metadatas=md,
         doc_metadata={"test_num": "1"},
     )
 
-    # then add some additional documents
-    new_texts = ["large language model", "information retrieval", "question answering"]
-    docsearch.add_documents(
-        [Document(page_content=t, metadata={"abbr": get_abbr(t)}) for t in new_texts],
-        doc_metadata={"test_num": "1"},
+    # then add some additional documents, now with add_documents
+    texts2 = ["large language model", "information retrieval", "question answering"]
+    doc_id2 = docsearch.add_documents(
+        [Document(page_content=t, metadata={"abbr": get_abbr(t)}) for t in texts2],
+        doc_metadata={"test_num": "2"},
     )
+    doc_ids = doc_id1 + doc_id2
 
-    # finally do a similarity search to see if all works okay
-    output = docsearch.similarity_search(
+    # test without filter
+    output1 = docsearch.similarity_search(
         "large language model",
         k=2,
         n_sentence_context=0,
+    )
+    assert len(output1) == 2
+    assert output1[0].page_content == "large language model"
+    assert output1[0].metadata['abbr'] == "llm"
+    assert output1[1].page_content == "information retrieval"
+    assert output1[1].metadata['abbr'] == "ir"
+
+    # test with metadata filter (doc level)
+    # since the query does not match test_num=1 directly we get RAG as the matching result
+    output2 = docsearch.similarity_search(
+        "large language model",
+        k=1,
+        n_sentence_context=0,
         filter="doc.test_num = 1",
     )
-    assert output[0].page_content == "large language model"
-    assert output[0].metadata == {"abbr": "llm"}
-    assert output[1].page_content == "information retrieval"
-    assert output[1].metadata == {"abbr": "ir"}
+    assert len(output2) == 1
+    assert output2[0].page_content == "retrieval augmented generation"
+    assert output2[0].metadata['abbr'] == "rag"
 
+    for doc_id in doc_ids:
+        docsearch._delete_doc(doc_id)
 
 def test_vectara_from_files() -> None:
     """Test end to end construction and search."""
@@ -73,8 +89,9 @@ def test_vectara_from_files() -> None:
         urllib.request.urlretrieve(url, name)
         files_list.append(name)
 
-    docsearch: Vectara = Vectara.from_files(
-        files=files_list,
+    docsearch: Vectara = Vectara()
+    doc_ids = docsearch.add_files(
+        files_list=files_list,
         embedding=FakeEmbeddings(),
         metadatas=[{"url": url, "test_num": "2"} for url in urls],
     )
@@ -101,7 +118,6 @@ def test_vectara_from_files() -> None:
         n_sentence_context=1,
         filter="doc.test_num = 2",
     )
-    print(output[0].page_content)
     assert output[0].page_content == (
         """\
 Note the use of “hybrid” in 3) above is different from that used sometimes in the literature, \
@@ -114,3 +130,6 @@ This classification scheme, however, misses a key insight gained in deep learnin
 models can greatly improve the training of DNNs and other deep discriminative models via better regularization.\
 """  # noqa: E501
     )
+
+    for doc_id in doc_ids:
+        docsearch._delete_doc(doc_id)

From 90fd840fb17e6733aa240af51828c99695c5cd53 Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofer@vectara.com>
Date: Sat, 19 Aug 2023 16:51:53 -0700
Subject: [PATCH 02/58] fixed formatting

---
 libs/langchain/langchain/vectorstores/vectara.py          | 8 ++++----
 .../tests/integration_tests/vectorstores/test_vectara.py  | 8 +++++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/libs/langchain/langchain/vectorstores/vectara.py b/libs/langchain/langchain/vectorstores/vectara.py
index f1c2bc6ca9e..cd8ee9c9fad 100644
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@@ -311,9 +311,9 @@ class Vectara(VectorStore):
 
         metadatas = []
         for x in responses:
-            md = { m["name"]: m["value"] for m in x["metadata"] }
-            doc_num = x['documentIndex']
-            doc_md = { m["name"]: m["value"] for m in documents[doc_num]['metadata'] }
+            md = {m["name"]: m["value"] for m in x["metadata"]}
+            doc_num = x["documentIndex"]
+            doc_md = {m["name"]: m["value"] for m in documents[doc_num]["metadata"]}
             md.update(doc_md)
             metadatas.append(md)
 
@@ -325,7 +325,7 @@ class Vectara(VectorStore):
                 ),
                 x["score"],
             )
-            for x,md in zip(responses,metadatas)
+            for x, md in zip(responses, metadatas)
         ]
 
         return docs
diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
index 3b2decfc2f9..5ed1d17343a 100644
--- a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
@@ -14,6 +14,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 #    VECTARA_API_KEY, VECTARA_CORPUS_ID and VECTARA_CUSTOMER_ID
 #
 
+
 def get_abbr(s: str) -> str:
     words = s.split(" ")  # Split the string into words
     first_letters = [word[0] for word in words]  # Extract the first letter of each word
@@ -51,9 +52,9 @@ def test_vectara_add_documents() -> None:
     )
     assert len(output1) == 2
     assert output1[0].page_content == "large language model"
-    assert output1[0].metadata['abbr'] == "llm"
+    assert output1[0].metadata["abbr"] == "llm"
     assert output1[1].page_content == "information retrieval"
-    assert output1[1].metadata['abbr'] == "ir"
+    assert output1[1].metadata["abbr"] == "ir"
 
     # test with metadata filter (doc level)
     # since the query does not match test_num=1 directly we get RAG as the matching result
@@ -65,11 +66,12 @@ def test_vectara_add_documents() -> None:
     )
     assert len(output2) == 1
     assert output2[0].page_content == "retrieval augmented generation"
-    assert output2[0].metadata['abbr'] == "rag"
+    assert output2[0].metadata["abbr"] == "rag"
 
     for doc_id in doc_ids:
         docsearch._delete_doc(doc_id)
 
+
 def test_vectara_from_files() -> None:
     """Test end to end construction and search."""
 

From e92e199ec1e6a042a57fcbcea9ee022fc5432486 Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofer@vectara.com>
Date: Sat, 19 Aug 2023 16:59:50 -0700
Subject: [PATCH 03/58] fixed lint issue

---
 .../tests/integration_tests/vectorstores/test_vectara.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
index 5ed1d17343a..8fa3cd7f40d 100644
--- a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
@@ -57,7 +57,7 @@ def test_vectara_add_documents() -> None:
     assert output1[1].metadata["abbr"] == "ir"
 
     # test with metadata filter (doc level)
-    # since the query does not match test_num=1 directly we get RAG as the matching result
+    # since the query does not match test_num=1 directly we get "RAG" as the result
     output2 = docsearch.similarity_search(
         "large language model",
         k=1,

From 00baddf34cb128f6d87b9bf5eb94e514ccbf832d Mon Sep 17 00:00:00 2001
From: Leonid Kuligin <kuligin@google.com>
Date: Mon, 28 Aug 2023 15:38:56 +0200
Subject: [PATCH 04/58] fixed enterprise search returning an empty array

---
 .../retrievers/google_cloud_enterprise_search.py     | 12 +++++++++---
 .../test_google_cloud_enterprise_search.py           |  3 +++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/libs/langchain/langchain/retrievers/google_cloud_enterprise_search.py b/libs/langchain/langchain/retrievers/google_cloud_enterprise_search.py
index 3570047509f..4e9c478d2b6 100644
--- a/libs/langchain/langchain/retrievers/google_cloud_enterprise_search.py
+++ b/libs/langchain/langchain/retrievers/google_cloud_enterprise_search.py
@@ -114,7 +114,13 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
 
     def __init__(self, **data: Any) -> None:
         """Initializes private fields."""
-        from google.cloud.discoveryengine_v1beta import SearchServiceClient
+        try:
+            from google.cloud.discoveryengine_v1beta import SearchServiceClient
+        except ImportError:
+            raise ImportError(
+                "google.cloud.discoveryengine is not installed."
+                "Please install it with pip install google-cloud-discoveryengine"
+            )
 
         super().__init__(**data)
         self._client = SearchServiceClient(credentials=self.credentials)
@@ -137,7 +143,7 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
             document_dict = MessageToDict(
                 result.document._pb, preserving_proto_field_name=True
             )
-            derived_struct_data = document_dict.get("derived_struct_data", None)
+            derived_struct_data = document_dict.get("derived_struct_data")
             if not derived_struct_data:
                 continue
 
@@ -150,7 +156,7 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
                 else "extractive_segments"
             )
 
-            for chunk in getattr(derived_struct_data, chunk_type, []):
+            for chunk in derived_struct_data.get(chunk_type, []):
                 doc_metadata["source"] = derived_struct_data.get("link", "")
 
                 if chunk_type == "extractive_answers":
diff --git a/libs/langchain/tests/integration_tests/retrievers/test_google_cloud_enterprise_search.py b/libs/langchain/tests/integration_tests/retrievers/test_google_cloud_enterprise_search.py
index 47f576ac296..86c80cfa278 100644
--- a/libs/langchain/tests/integration_tests/retrievers/test_google_cloud_enterprise_search.py
+++ b/libs/langchain/tests/integration_tests/retrievers/test_google_cloud_enterprise_search.py
@@ -24,6 +24,9 @@ def test_google_cloud_enterprise_search_get_relevant_documents() -> None:
     """Test the get_relevant_documents() method."""
     retriever = GoogleCloudEnterpriseSearchRetriever()
     documents = retriever.get_relevant_documents("What are Alphabet's Other Bets?")
+    assert len(documents) > 0
     for doc in documents:
         assert isinstance(doc, Document)
         assert doc.page_content
+        assert doc.metadata["id"]
+        assert doc.metadata["source"]

From 9aaa0fdce084028632c305cadae6390e4e0d2ed6 Mon Sep 17 00:00:00 2001
From: Predrag Gruevski <obi1kenobi82@gmail.com>
Date: Mon, 28 Aug 2023 14:20:48 +0000
Subject: [PATCH 05/58] Use unified Python setup steps for release workflow.

---
 .github/workflows/_release.yml | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/_release.yml b/.github/workflows/_release.yml
index 5ddf79ca7c2..1d7668978ba 100644
--- a/.github/workflows/_release.yml
+++ b/.github/workflows/_release.yml
@@ -31,13 +31,15 @@ jobs:
         working-directory: ${{ inputs.working-directory }}
     steps:
       - uses: actions/checkout@v3
-      - name: Install poetry
-        run: pipx install "poetry==$POETRY_VERSION"
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v4
+
+      - name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
+        uses: "./.github/actions/poetry_setup"
         with:
           python-version: "3.10"
-          cache: "poetry"
+          poetry-version: ${{ env.POETRY_VERSION }}
+          working-directory: ${{ inputs.working-directory }}
+          cache-key: release
+
       - name: Build project for distribution
         run: poetry build
       - name: Check Version

From 97741d41c5fc75a2bedb5706f94d89d87ea74e1e Mon Sep 17 00:00:00 2001
From: hughcrt <me@hugh.sh>
Date: Mon, 28 Aug 2023 19:24:50 +0200
Subject: [PATCH 06/58] Add LLMonitorCallbackHandler

---
 .../integrations/callbacks/llmonitor.md       |  63 ++++
 .../langchain/langchain/callbacks/__init__.py |   2 +
 .../langchain/callbacks/llmonitor_callback.py | 319 ++++++++++++++++++
 3 files changed, 384 insertions(+)
 create mode 100644 docs/extras/integrations/callbacks/llmonitor.md
 create mode 100644 libs/langchain/langchain/callbacks/llmonitor_callback.py

diff --git a/docs/extras/integrations/callbacks/llmonitor.md b/docs/extras/integrations/callbacks/llmonitor.md
new file mode 100644
index 00000000000..57b1ec7c952
--- /dev/null
+++ b/docs/extras/integrations/callbacks/llmonitor.md
@@ -0,0 +1,63 @@
+# LLMonitor
+
+[LLMonitor](https://llmonitor.com) is an open-source observability platform that provides cost tracking, user tracking and powerful agent tracing.
+
+<video controls width='900' >
+  <source src='https://llmonitor.com/videos/demo-annotated.mp4'/>
+</video>
+
+## Setup
+Create an account on [llmonitor.com](https://llmonitor.com), create an `App`, and then copy the associated `tracking id`.
+Once you have it, set it as an environment variable by running:
+```bash
+export LLMONITOR_APP_ID="..."
+```
+
+If you'd prefer not to set an environment variable, you can pass the key directly when initializing the callback handler:
+```python
+from langchain.callbacks import LLMonitorCallbackHandler
+
+handler = LLMonitorCallbackHandler(app_id="...")
+```
+
+## Usage with LLM/Chat models
+```python
+from langchain.llms import OpenAI
+from langchain.chat_models import ChatOpenAI
+from langchain.callbacks import LLMonitorCallbackHandler
+
+handler = LLMonitorCallbackHandler(app_id="...")
+
+llm = OpenAI(
+    callbacks=[handler],
+)
+
+chat = ChatOpenAI(
+    callbacks=[handler],
+    metadata={"userId": "123"},  # you can assign user ids to models in the metadata
+)
+```
+
+
+## Usage with agents
+```python
+from langchain.agents import load_tools, initialize_agent, AgentType
+from langchain.llms import OpenAI
+from langchain.callbacks import LLMonitorCallbackHandler
+
+handler = LLMonitorCallbackHandler(app_id="...")
+
+llm = OpenAI(temperature=0)
+tools = load_tools(["serpapi", "llm-math"], llm=llm)
+agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
+agent.run(
+    "Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?",
+    callbacks=[handler],
+    metadata={
+        "agentName": "Leo DiCaprio's girlfriend",  # you can assign a custom agent in the metadata
+    },
+)
+```
+
+## Support
+For any question or issue with integration you can reach out to the LLMonitor team on [Discord](http://discord.com/invite/8PafSG58kK) or via [email](mailto:vince@llmonitor.com).
diff --git a/libs/langchain/langchain/callbacks/__init__.py b/libs/langchain/langchain/callbacks/__init__.py
index 8398741be34..12e18d52c7e 100644
--- a/libs/langchain/langchain/callbacks/__init__.py
+++ b/libs/langchain/langchain/callbacks/__init__.py
@@ -19,6 +19,7 @@ from langchain.callbacks.flyte_callback import FlyteCallbackHandler
 from langchain.callbacks.human import HumanApprovalCallbackHandler
 from langchain.callbacks.infino_callback import InfinoCallbackHandler
 from langchain.callbacks.labelstudio_callback import LabelStudioCallbackHandler
+from langchain.callbacks.llmonitor_callback import LLMonitorCallbackHandler
 from langchain.callbacks.manager import (
     get_openai_callback,
     tracing_enabled,
@@ -53,6 +54,7 @@ __all__ = [
     "HumanApprovalCallbackHandler",
     "InfinoCallbackHandler",
     "MlflowCallbackHandler",
+    "LLMonitorCallbackHandler",
     "OpenAICallbackHandler",
     "StdOutCallbackHandler",
     "AsyncIteratorCallbackHandler",
diff --git a/libs/langchain/langchain/callbacks/llmonitor_callback.py b/libs/langchain/langchain/callbacks/llmonitor_callback.py
new file mode 100644
index 00000000000..99ca1e92905
--- /dev/null
+++ b/libs/langchain/langchain/callbacks/llmonitor_callback.py
@@ -0,0 +1,319 @@
+import os
+import traceback
+from datetime import datetime
+from typing import Any, Dict, List, Literal, Optional, Union
+from uuid import UUID
+
+import requests
+
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.schema.agent import AgentAction, AgentFinish
+from langchain.schema.messages import BaseMessage
+from langchain.schema.output import LLMResult
+
+DEFAULT_API_URL = "https://app.llmonitor.com"
+
+
+def _parse_lc_role(role: str) -> Literal["user", "ai", "system", "function"] | None:
+    if role == "human":
+        return "user"
+    elif role == "ai":
+        return "ai"
+    elif role == "system":
+        return "system"
+    elif role == "function":
+        return "function"
+    else:
+        return None
+
+
+def _serialize_lc_message(message: BaseMessage) -> Dict[str, Any]:
+    return {"text": message.content, "role": _parse_lc_role(message.type)}
+
+
+class LLMonitorCallbackHandler(BaseCallbackHandler):
+    """Initializes the `LLMonitorCallbackHandler`.
+    #### Parameters:
+        - `app_id`: The app id of the app you want to report to. Defaults to `None`, which means that `LLMONITOR_APP_ID` will be used.
+        - `api_url`: The url of the LLMonitor API. Defaults to `None`, which means that either `LLMONITOR_API_URL` environment variable or `https://app.llmonitor.com` will be used.
+
+    #### Raises:
+        - `ValueError`: if `app_id` is not provided either as an argument or as an environment variable.
+        - `ConnectionError`: if the connection to the API fails.
+
+
+    #### Example:
+    ```python
+    from langchain.llms import OpenAI
+    from langchain.callbacks import LLMonitorCallbackHandler
+
+    llmonitor_callback = LLMonitorCallbackHandler()
+    llm = OpenAI(callbacks=[llmonitor_callback], metadata={"userId": "user-123"})
+    llm.predict("Hello, how are you?")
+    ```
+    """
+
+    __api_url: str
+    __app_id: str
+
+    def __init__(self, app_id: str | None = None, api_url: str | None = None) -> None:
+        super().__init__()
+
+        self.__api_url = api_url or os.getenv("LLMONITOR_API_URL") or DEFAULT_API_URL
+
+        _app_id = app_id or os.getenv("LLMONITOR_APP_ID")
+        if _app_id is None:
+            raise ValueError(
+                "app_id must be provided either as an argument or as an environment variable"
+            )
+        self.__app_id = _app_id
+
+        try:
+            res = requests.get(f"{self.__api_url}/api/app/{self.__app_id}")
+            if not res.ok:
+                raise ConnectionError()
+        except Exception as e:
+            raise ConnectionError(
+                f"Could not connect to the LLMonitor API at {self.__api_url}"
+            ) from e
+
+    def __send_event(self, event: Dict[str, Any]) -> None:
+        headers = {"Content-Type": "application/json"}
+        event = {**event, "app": self.__app_id, "timestamp": str(datetime.utcnow())}
+        data = {"events": event}
+        requests.post(headers=headers, url=f"{self.__api_url}/api/report", json=data)
+
+    def on_llm_start(
+        self,
+        serialized: Dict[str, Any],
+        prompts: List[str],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: List[str] | None = None,
+        metadata: Dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        event = {
+            "event": "start",
+            "type": "llm",
+            "userId": (metadata or {}).get("userId"),
+            "runId": str(run_id),
+            "parentRunId": str(parent_run_id) if parent_run_id else None,
+            "input": prompts[0],
+            "name": kwargs.get("invocation_params", {}).get("model_name"),
+            "tags": tags,
+            "metadata": metadata,
+        }
+        self.__send_event(event)
+
+    def on_chat_model_start(
+        self,
+        serialized: Dict[str, Any],
+        messages: List[List[BaseMessage]],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: List[str] | None = None,
+        metadata: Dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        event = {
+            "event": "start",
+            "type": "llm",
+            "userId": (metadata or {}).get("userId"),
+            "runId": str(run_id),
+            "parentRunId": str(parent_run_id) if parent_run_id else None,
+            "input": [_serialize_lc_message(message[0]) for message in messages],
+            "name": kwargs.get("invocation_params", {}).get("model_name"),
+            "tags": tags,
+            "metadata": metadata,
+        }
+        self.__send_event(event)
+
+    def on_llm_end(
+        self,
+        response: LLMResult,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> None:
+        token_usage = (response.llm_output or {}).get("token_usage", {})
+
+        event = {
+            "event": "end",
+            "type": "llm",
+            "runId": str(run_id),
+            "parent_run_id": str(parent_run_id) if parent_run_id else None,
+            "output": {"text": response.generations[0][0].text, "role": "ai"},
+            "tokensUsage": {
+                "prompt": token_usage.get("prompt_tokens", 0),
+                "completion": token_usage.get("completion_tokens", 0),
+            },
+        }
+        self.__send_event(event)
+
+    def on_llm_error(
+        self,
+        error: Exception | KeyboardInterrupt,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        event = {
+            "event": "error",
+            "type": "llm",
+            "runId": str(run_id),
+            "parent_run_id": str(parent_run_id) if parent_run_id else None,
+            "error": {"message": str(error), "stack": traceback.format_exc()},
+        }
+        self.__send_event(event)
+
+    def on_tool_start(
+        self,
+        serialized: Dict[str, Any],
+        input_str: str,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: List[str] | None = None,
+        metadata: Dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        event = {
+            "event": "start",
+            "type": "tool",
+            "userId": (metadata or {}).get("userId"),
+            "runId": str(run_id),
+            "parentRunId": str(parent_run_id) if parent_run_id else None,
+            "name": serialized.get("name"),
+            "input": input_str,
+            "tags": tags,
+            "metadata": metadata,
+        }
+        self.__send_event(event)
+
+    def on_tool_end(
+        self,
+        output: str,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: List[str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        event = {
+            "event": "end",
+            "type": "tool",
+            "runId": str(run_id),
+            "parent_run_id": str(parent_run_id) if parent_run_id else None,
+            "output": output,
+        }
+        self.__send_event(event)
+
+    def on_chain_start(
+        self,
+        serialized: Dict[str, Any],
+        inputs: Dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: List[str] | None = None,
+        metadata: Dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        name = serialized.get("id", [None, None, None, None])[3]
+        type = "chain"
+
+        agentName = (metadata or {}).get("agentName")
+        if agentName is not None:
+            type = "agent"
+            name = agentName
+        if name == "AgentExecutor" or name == "PlanAndExecute":
+            type = "agent"
+        event = {
+            "event": "start",
+            "type": type,
+            "userId": (metadata or {}).get("userId"),
+            "runId": str(run_id),
+            "parentRunId": str(parent_run_id) if parent_run_id else None,
+            "input": inputs.get("input", inputs),
+            "tags": tags,
+            "metadata": metadata,
+            "name": serialized.get("id", [None, None, None, None])[3],
+        }
+
+        self.__send_event(event)
+
+    def on_chain_end(
+        self,
+        outputs: Dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        event = {
+            "event": "end",
+            "type": "chain",
+            "runId": str(run_id),
+            "output": outputs.get("output", outputs),
+        }
+        self.__send_event(event)
+
+    def on_chain_error(
+        self,
+        error: Exception | KeyboardInterrupt,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        event = {
+            "event": "error",
+            "type": "chain",
+            "runId": str(run_id),
+            "parent_run_id": str(parent_run_id) if parent_run_id else None,
+            "error": {"message": str(error), "stack": traceback.format_exc()},
+        }
+        self.__send_event(event)
+
+    def on_agent_action(
+        self,
+        action: AgentAction,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        event = {
+            "event": "start",
+            "type": "tool",
+            "runId": str(run_id),
+            "parentRunId": str(parent_run_id) if parent_run_id else None,
+            "name": action.tool,
+            "input": action.tool_input,
+        }
+        self.__send_event(event)
+
+    def on_agent_finish(
+        self,
+        finish: AgentFinish,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        event = {
+            "event": "end",
+            "type": "agent",
+            "runId": str(run_id),
+            "parentRunId": str(parent_run_id) if parent_run_id else None,
+            "output": finish.return_values,
+        }
+        self.__send_event(event)
+
+
+__all__ = ["LLMonitorCallbackHandler"]

From 3a4d4c940c71d67affecfd21503943648f75175f Mon Sep 17 00:00:00 2001
From: hughcrt <me@hugh.sh>
Date: Mon, 28 Aug 2023 19:26:33 +0200
Subject: [PATCH 07/58] Change video width

---
 docs/extras/integrations/callbacks/llmonitor.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/extras/integrations/callbacks/llmonitor.md b/docs/extras/integrations/callbacks/llmonitor.md
index 57b1ec7c952..daec3dad816 100644
--- a/docs/extras/integrations/callbacks/llmonitor.md
+++ b/docs/extras/integrations/callbacks/llmonitor.md
@@ -2,7 +2,7 @@
 
 [LLMonitor](https://llmonitor.com) is an open-source observability platform that provides cost tracking, user tracking and powerful agent tracing.
 
-<video controls width='900' >
+<video controls width='100%' >
   <source src='https://llmonitor.com/videos/demo-annotated.mp4'/>
 </video>
 

From 82a3c2a557874dd295f938d27f2157041d53fb09 Mon Sep 17 00:00:00 2001
From: tongtie <tie0214@gmail.com>
Date: Tue, 29 Aug 2023 11:52:50 +0800
Subject: [PATCH 08/58] docs: Fix the syntax error, replace "dotenv.load_env()"
 with "dotenv.load_dotenv()".

---
 docs/extras/use_cases/apis.ipynb                                | 2 +-
 docs/extras/use_cases/chatbots.ipynb                            | 2 +-
 docs/extras/use_cases/code_understanding.ipynb                  | 2 +-
 docs/extras/use_cases/extraction.ipynb                          | 2 +-
 docs/extras/use_cases/more/agents/agents.ipynb                  | 2 +-
 .../use_cases/question_answering/question_answering.ipynb       | 2 +-
 docs/extras/use_cases/sql.ipynb                                 | 2 +-
 docs/extras/use_cases/summarization.ipynb                       | 2 +-
 docs/extras/use_cases/tagging.ipynb                             | 2 +-
 docs/extras/use_cases/web_scraping.ipynb                        | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/extras/use_cases/apis.ipynb b/docs/extras/use_cases/apis.ipynb
index 31d01199194..716368054c7 100644
--- a/docs/extras/use_cases/apis.ipynb
+++ b/docs/extras/use_cases/apis.ipynb
@@ -62,7 +62,7 @@
     "\n",
     "# Set env var OPENAI_API_KEY or load from a .env file:\n",
     "# import dotenv\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/chatbots.ipynb b/docs/extras/use_cases/chatbots.ipynb
index 1f736130565..58e3ce5317d 100644
--- a/docs/extras/use_cases/chatbots.ipynb
+++ b/docs/extras/use_cases/chatbots.ipynb
@@ -54,7 +54,7 @@
     "\n",
     "# Set env var OPENAI_API_KEY or load from a .env file:\n",
     "# import dotenv\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/code_understanding.ipynb b/docs/extras/use_cases/code_understanding.ipynb
index 6e59c3f2409..c664956773e 100644
--- a/docs/extras/use_cases/code_understanding.ipynb
+++ b/docs/extras/use_cases/code_understanding.ipynb
@@ -42,7 +42,7 @@
     "# Set env var OPENAI_API_KEY or load from a .env file\n",
     "# import dotenv\n",
     "\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/extraction.ipynb b/docs/extras/use_cases/extraction.ipynb
index f1e6b2c23e4..7aaa37f0464 100644
--- a/docs/extras/use_cases/extraction.ipynb
+++ b/docs/extras/use_cases/extraction.ipynb
@@ -73,7 +73,7 @@
     "\n",
     "# Set env var OPENAI_API_KEY or load from a .env file:\n",
     "# import dotenv\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/more/agents/agents.ipynb b/docs/extras/use_cases/more/agents/agents.ipynb
index e3b187d0ef3..98b65d1bbec 100644
--- a/docs/extras/use_cases/more/agents/agents.ipynb
+++ b/docs/extras/use_cases/more/agents/agents.ipynb
@@ -70,7 +70,7 @@
     "# Set env var OPENAI_API_KEY and SERPAPI_API_KEY or load from a .env file\n",
     "# import dotenv\n",
     "\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/question_answering/question_answering.ipynb b/docs/extras/use_cases/question_answering/question_answering.ipynb
index 035ea5e2b27..fe61c0ad25c 100644
--- a/docs/extras/use_cases/question_answering/question_answering.ipynb
+++ b/docs/extras/use_cases/question_answering/question_answering.ipynb
@@ -47,7 +47,7 @@
     "# Set env var OPENAI_API_KEY or load from a .env file\n",
     "# import dotenv\n",
     "\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/sql.ipynb b/docs/extras/use_cases/sql.ipynb
index aea7ed3ac67..c33b4e3eb7a 100644
--- a/docs/extras/use_cases/sql.ipynb
+++ b/docs/extras/use_cases/sql.ipynb
@@ -50,7 +50,7 @@
     "# Set env var OPENAI_API_KEY or load from a .env file\n",
     "# import dotenv\n",
     "\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/summarization.ipynb b/docs/extras/use_cases/summarization.ipynb
index cdc9c331c91..630d15f9343 100644
--- a/docs/extras/use_cases/summarization.ipynb
+++ b/docs/extras/use_cases/summarization.ipynb
@@ -76,7 +76,7 @@
     "# Set env var OPENAI_API_KEY or load from a .env file\n",
     "# import dotenv\n",
     "\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/tagging.ipynb b/docs/extras/use_cases/tagging.ipynb
index d8c3b166312..235f9d06cb1 100644
--- a/docs/extras/use_cases/tagging.ipynb
+++ b/docs/extras/use_cases/tagging.ipynb
@@ -44,7 +44,7 @@
     "\n",
     "# Set env var OPENAI_API_KEY or load from a .env file:\n",
     "# import dotenv\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {
diff --git a/docs/extras/use_cases/web_scraping.ipynb b/docs/extras/use_cases/web_scraping.ipynb
index 4f94b9792d7..57c9e8387a1 100644
--- a/docs/extras/use_cases/web_scraping.ipynb
+++ b/docs/extras/use_cases/web_scraping.ipynb
@@ -41,7 +41,7 @@
     "\n",
     "# Set env var OPENAI_API_KEY or load from a .env file:\n",
     "# import dotenv\n",
-    "# dotenv.load_env()"
+    "# dotenv.load_dotenv()"
    ]
   },
   {

From 6d3485e79858d14ee1b2a99f737ae46a5342d5c9 Mon Sep 17 00:00:00 2001
From: Nikhil Suresh <nikhilsuresh097@gmail.com>
Date: Fri, 25 Aug 2023 18:11:47 +0000
Subject: [PATCH 09/58] fixed regex to match sources for all cases, also
 includes source

---
 libs/langchain/langchain/chains/qa_with_sources/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py
index d0408059a8e..2c75e60e36c 100644
--- a/libs/langchain/langchain/chains/qa_with_sources/base.py
+++ b/libs/langchain/langchain/chains/qa_with_sources/base.py
@@ -124,7 +124,7 @@ class BaseQAWithSourcesChain(Chain, ABC):
             answer, sources = re.split(
                 r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE
             )[:2]
-            sources = re.split(r"\n", sources)[0].strip()
+            sources = re.split(r"\n", sources)[0]
         else:
             sources = ""
         return answer, sources

From b1f649bca5f35e84bfb64d70760428286a37a4bf Mon Sep 17 00:00:00 2001
From: Nikhil Suresh <nikhilsuresh097@gmail.com>
Date: Sat, 26 Aug 2023 00:10:52 +0000
Subject: [PATCH 10/58] fixed issue with white space and added unit tests

---
 libs/langchain/langchain/chains/qa_with_sources/base.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py
index 2c75e60e36c..c7d0b76ab51 100644
--- a/libs/langchain/langchain/chains/qa_with_sources/base.py
+++ b/libs/langchain/langchain/chains/qa_with_sources/base.py
@@ -121,10 +121,8 @@ class BaseQAWithSourcesChain(Chain, ABC):
     def _split_sources(self, answer: str) -> Tuple[str, str]:
         """Split sources from answer."""
         if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE):
-            answer, sources = re.split(
-                r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE
-            )[:2]
-            sources = re.split(r"\n", sources)[0]
+            answer, sources = re.split(r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE)[:2]
+            sources = re.split(r"\n", sources)[0].strip()
         else:
             sources = ""
         return answer, sources

From 8a4670e1276cbcacae437fe33e41bb40a6a756b0 Mon Sep 17 00:00:00 2001
From: Nikhil Suresh <nikhilsuresh097@gmail.com>
Date: Sat, 26 Aug 2023 00:17:54 +0000
Subject: [PATCH 11/58] updated formatting changes

---
 libs/langchain/langchain/chains/qa_with_sources/base.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py
index c7d0b76ab51..d0408059a8e 100644
--- a/libs/langchain/langchain/chains/qa_with_sources/base.py
+++ b/libs/langchain/langchain/chains/qa_with_sources/base.py
@@ -121,7 +121,9 @@ class BaseQAWithSourcesChain(Chain, ABC):
     def _split_sources(self, answer: str) -> Tuple[str, str]:
         """Split sources from answer."""
         if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE):
-            answer, sources = re.split(r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE)[:2]
+            answer, sources = re.split(
+                r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE
+            )[:2]
             sources = re.split(r"\n", sources)[0].strip()
         else:
             sources = ""

From 64eb5a6082d506bc794f352c03102f72c3c5a34d Mon Sep 17 00:00:00 2001
From: Nikhil Suresh <nikhilsuresh097@gmail.com>
Date: Tue, 29 Aug 2023 03:44:14 +0000
Subject: [PATCH 12/58] removed unnecessary white space in regex that breaks qa
 with sources chain

---
 libs/langchain/langchain/chains/qa_with_sources/base.py      | 2 +-
 .../tests/unit_tests/chains/test_qa_with_sources.py          | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py
index d0408059a8e..e6a07c37630 100644
--- a/libs/langchain/langchain/chains/qa_with_sources/base.py
+++ b/libs/langchain/langchain/chains/qa_with_sources/base.py
@@ -122,7 +122,7 @@ class BaseQAWithSourcesChain(Chain, ABC):
         """Split sources from answer."""
         if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE):
             answer, sources = re.split(
-                r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE
+                r"SOURCES?[:]|QUESTION:\s", answer, flags=re.IGNORECASE
             )[:2]
             sources = re.split(r"\n", sources)[0].strip()
         else:
diff --git a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
index 56daf006859..c93d202bae0 100644
--- a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
+++ b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
@@ -27,6 +27,11 @@ from tests.unit_tests.llms.fake_llm import FakeLLM
             "This Agreement is governed by English law.\n",
             "28-pl",
         ),
+        (
+            "According to the sources, the agreement is governed by English law.\nSource: 28-pl",
+            "According to the sources, the agreement is governed by English law.\n",
+            "28-pl",
+        ),
         (
             "This Agreement is governed by English law.\n"
             "SOURCES: 28-pl\n\n"

From 766bbd6c6b6e3a1622aeeede2c03ba21ccfa56c2 Mon Sep 17 00:00:00 2001
From: Ikko Eltociear Ashimine <eltociear@gmail.com>
Date: Tue, 29 Aug 2023 12:57:19 +0900
Subject: [PATCH 13/58] Fix typo in code_understanding.ipynb

seperate -> separate
---
 docs/extras/use_cases/code_understanding.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/extras/use_cases/code_understanding.ipynb b/docs/extras/use_cases/code_understanding.ipynb
index 6e59c3f2409..7aaa620ffca 100644
--- a/docs/extras/use_cases/code_understanding.ipynb
+++ b/docs/extras/use_cases/code_understanding.ipynb
@@ -25,7 +25,7 @@
     "In particular, we can employ a [splitting strategy](https://python.langchain.com/docs/integrations/document_loaders/source_code) that does a few things:\n",
     "\n",
     "* Keeps each top-level function and class in the code is loaded into separate documents. \n",
-    "* Puts remaining into a seperate document.\n",
+    "* Puts remaining into a separate document.\n",
     "* Retains metadata about where each split comes from\n",
     "\n",
     "## Quickstart"
@@ -94,7 +94,7 @@
     "We load the py code using [`LanguageParser`](https://python.langchain.com/docs/integrations/document_loaders/source_code), which will:\n",
     "\n",
     "* Keep top-level functions and classes together (into a single document)\n",
-    "* Put remaining code into a seperate document\n",
+    "* Put remaining code into a separate document\n",
     "* Retains metadata about where each split comes from"
    ]
   },

From 23ef836b48bbbc328813a0e48e64ac35bbaf5b5d Mon Sep 17 00:00:00 2001
From: Nikhil Suresh <nikhilsuresh097@gmail.com>
Date: Tue, 29 Aug 2023 04:18:33 +0000
Subject: [PATCH 14/58] matches colon and any number of white spaces after
 colon

---
 libs/langchain/langchain/chains/qa_with_sources/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py
index e6a07c37630..79b083de0b7 100644
--- a/libs/langchain/langchain/chains/qa_with_sources/base.py
+++ b/libs/langchain/langchain/chains/qa_with_sources/base.py
@@ -120,9 +120,9 @@ class BaseQAWithSourcesChain(Chain, ABC):
 
     def _split_sources(self, answer: str) -> Tuple[str, str]:
         """Split sources from answer."""
-        if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE):
+        if re.search(r"SOURCES?[:]\s", answer, re.IGNORECASE):
             answer, sources = re.split(
-                r"SOURCES?[:]|QUESTION:\s", answer, flags=re.IGNORECASE
+                r"SOURCES?[:]\s|QUESTION:\s", answer, flags=re.IGNORECASE
             )[:2]
             sources = re.split(r"\n", sources)[0].strip()
         else:

From 7979cef06af6cec44e391486a77addcd78c2eab8 Mon Sep 17 00:00:00 2001
From: hughcrt <me@hugh.sh>
Date: Tue, 29 Aug 2023 06:22:50 +0200
Subject: [PATCH 15/58] Replace `|` by `Union`

---
 .../langchain/callbacks/llmonitor_callback.py | 52 ++++++++++---------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/libs/langchain/langchain/callbacks/llmonitor_callback.py b/libs/langchain/langchain/callbacks/llmonitor_callback.py
index 99ca1e92905..7ecf63897ae 100644
--- a/libs/langchain/langchain/callbacks/llmonitor_callback.py
+++ b/libs/langchain/langchain/callbacks/llmonitor_callback.py
@@ -14,7 +14,9 @@ from langchain.schema.output import LLMResult
 DEFAULT_API_URL = "https://app.llmonitor.com"
 
 
-def _parse_lc_role(role: str) -> Literal["user", "ai", "system", "function"] | None:
+def _parse_lc_role(
+    role: str,
+) -> Union[Literal["user", "ai", "system", "function"], None]:
     if role == "human":
         return "user"
     elif role == "ai":
@@ -56,7 +58,9 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
     __api_url: str
     __app_id: str
 
-    def __init__(self, app_id: str | None = None, api_url: str | None = None) -> None:
+    def __init__(
+        self, app_id: Union[str, None] = None, api_url: Union[str, None] = None
+    ) -> None:
         super().__init__()
 
         self.__api_url = api_url or os.getenv("LLMONITOR_API_URL") or DEFAULT_API_URL
@@ -89,9 +93,9 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         prompts: List[str],
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: List[str] | None = None,
-        metadata: Dict[str, Any] | None = None,
+        parent_run_id: Union[UUID, None] = None,
+        tags: Union[List[str], None] = None,
+        metadata: Union[Dict[str, Any], None] = None,
         **kwargs: Any,
     ) -> None:
         event = {
@@ -113,9 +117,9 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         messages: List[List[BaseMessage]],
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: List[str] | None = None,
-        metadata: Dict[str, Any] | None = None,
+        parent_run_id: Union[UUID, None] = None,
+        tags: Union[List[str], None] = None,
+        metadata: Union[Dict[str, Any], None] = None,
         **kwargs: Any,
     ) -> Any:
         event = {
@@ -136,7 +140,7 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         response: LLMResult,
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
+        parent_run_id: Union[UUID, None] = None,
         **kwargs: Any,
     ) -> None:
         token_usage = (response.llm_output or {}).get("token_usage", {})
@@ -156,10 +160,10 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
 
     def on_llm_error(
         self,
-        error: Exception | KeyboardInterrupt,
+        error: Union[Exception, KeyboardInterrupt],
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
+        parent_run_id: Union[UUID, None] = None,
         **kwargs: Any,
     ) -> Any:
         event = {
@@ -177,9 +181,9 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         input_str: str,
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: List[str] | None = None,
-        metadata: Dict[str, Any] | None = None,
+        parent_run_id: Union[UUID, None] = None,
+        tags: Union[List[str], None] = None,
+        metadata: Union[Dict[str, Any], None] = None,
         **kwargs: Any,
     ) -> None:
         event = {
@@ -200,8 +204,8 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         output: str,
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: List[str] | None = None,
+        parent_run_id: Union[UUID, None] = None,
+        tags: Union[List[str], None] = None,
         **kwargs: Any,
     ) -> None:
         event = {
@@ -219,9 +223,9 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         inputs: Dict[str, Any],
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: List[str] | None = None,
-        metadata: Dict[str, Any] | None = None,
+        parent_run_id: Union[UUID, None] = None,
+        tags: Union[List[str], None] = None,
+        metadata: Union[Dict[str, Any], None] = None,
         **kwargs: Any,
     ) -> Any:
         name = serialized.get("id", [None, None, None, None])[3]
@@ -252,7 +256,7 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         outputs: Dict[str, Any],
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
+        parent_run_id: Union[UUID, None] = None,
         **kwargs: Any,
     ) -> Any:
         event = {
@@ -265,10 +269,10 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
 
     def on_chain_error(
         self,
-        error: Exception | KeyboardInterrupt,
+        error: Union[Exception, KeyboardInterrupt],
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
+        parent_run_id: Union[UUID, None] = None,
         **kwargs: Any,
     ) -> Any:
         event = {
@@ -285,7 +289,7 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         action: AgentAction,
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
+        parent_run_id: Union[UUID, None] = None,
         **kwargs: Any,
     ) -> Any:
         event = {
@@ -303,7 +307,7 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         finish: AgentFinish,
         *,
         run_id: UUID,
-        parent_run_id: UUID | None = None,
+        parent_run_id: Union[UUID, None] = None,
         **kwargs: Any,
     ) -> Any:
         event = {

From 8b8d2a65355f5d1f4408bbe63ebcda323dc82b17 Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofer@vectara.com>
Date: Mon, 28 Aug 2023 22:26:55 -0700
Subject: [PATCH 16/58] fixed similarity_search_with_score to really use a
 score updated unit test with a test for score threshold Updated demo notebook

---
 .../integrations/vectorstores/vectara.ipynb   | 85 ++++++++++---------
 .../langchain/vectorstores/vectara.py         | 13 ++-
 .../vectorstores/test_vectara.py              | 13 +++
 3 files changed, 67 insertions(+), 44 deletions(-)

diff --git a/docs/extras/integrations/vectorstores/vectara.ipynb b/docs/extras/integrations/vectorstores/vectara.ipynb
index bf99823394f..0741c1b1998 100644
--- a/docs/extras/integrations/vectorstores/vectara.ipynb
+++ b/docs/extras/integrations/vectorstores/vectara.ipynb
@@ -1,7 +1,6 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "683953b3",
    "metadata": {},
@@ -60,7 +59,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "eeead681",
    "metadata": {},
@@ -73,7 +71,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "04a1f1a0",
    "metadata": {},
    "outputs": [],
@@ -86,12 +84,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "be0a4973",
    "metadata": {},
    "outputs": [],
    "source": [
-    "loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
+    "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
     "documents = loader.load()\n",
     "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
     "docs = text_splitter.split_documents(documents)"
@@ -99,7 +97,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "8429667e",
    "metadata": {
     "ExecuteTime": {
@@ -118,7 +116,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "90dbf3e7",
    "metadata": {},
@@ -133,7 +130,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "id": "85ef3468",
    "metadata": {},
    "outputs": [],
@@ -165,7 +162,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "1f9215c8",
    "metadata": {
@@ -182,7 +178,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "id": "a8c513ab",
    "metadata": {
     "ExecuteTime": {
@@ -201,7 +197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "fc516993",
    "metadata": {
     "ExecuteTime": {
@@ -215,13 +211,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
-      "\n",
-      "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
-      "\n",
-      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
-      "\n",
-      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
+      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.\n"
      ]
     }
    ],
@@ -230,7 +220,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "1bda9bf5",
    "metadata": {},
@@ -242,7 +231,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "id": "8804a21d",
    "metadata": {
     "ExecuteTime": {
@@ -254,13 +243,13 @@
    "source": [
     "query = \"What did the president say about Ketanji Brown Jackson\"\n",
     "found_docs = vectara.similarity_search_with_score(\n",
-    "    query, filter=\"doc.speech = 'state-of-the-union'\"\n",
+    "    query, filter=\"doc.speech = 'state-of-the-union'\", score_threshold=0.2,\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "id": "756a6887",
    "metadata": {
     "ExecuteTime": {
@@ -273,15 +262,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
+      "Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. A former top litigator in private practice.\n",
       "\n",
-      "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
-      "\n",
-      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
-      "\n",
-      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
-      "\n",
-      "Score: 0.4917977\n"
+      "Score: 0.786569\n"
      ]
     }
    ],
@@ -292,7 +275,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "1f9876a8",
    "metadata": {},
@@ -302,7 +284,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "id": "47784de5",
    "metadata": {},
    "outputs": [
@@ -310,22 +292,43 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(Document(page_content='We must forever conduct our struggle on the high plane of dignity and discipline.', metadata={'section': '1'}), 0.7962591)\n",
-      "(Document(page_content='We must not allow our\\ncreative protests to degenerate into physical violence. . . .', metadata={'section': '1'}), 0.25983918)\n"
+      "With this threshold of 1.2 we have 0 documents\n"
      ]
     }
    ],
    "source": [
     "query = \"We must forever conduct our struggle\"\n",
+    "min_score = 1.2\n",
     "found_docs = vectara.similarity_search_with_score(\n",
-    "    query, filter=\"doc.speech = 'I-have-a-dream'\"\n",
+    "    query, filter=\"doc.speech = 'I-have-a-dream'\", score_threshold=min_score,\n",
     ")\n",
-    "print(found_docs[0])\n",
-    "print(found_docs[1])"
+    "print(f\"With this threshold of {min_score} we have {len(found_docs)} documents\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "3e22949f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "With this threshold of 0.2 we have 3 documents\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = \"We must forever conduct our struggle\"\n",
+    "min_score = 0.2\n",
+    "found_docs = vectara.similarity_search_with_score(\n",
+    "    query, filter=\"doc.speech = 'I-have-a-dream'\", score_threshold=min_score,\n",
+    ")\n",
+    "print(f\"With this threshold of {min_score} we have {len(found_docs)} documents\")\n"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "691a82d6",
    "metadata": {},
@@ -349,7 +352,7 @@
     {
      "data": {
       "text/plain": [
-       "VectaraRetriever(vectorstore=<langchain.vectorstores.vectara.Vectara object at 0x12772caf0>, search_type='similarity', search_kwargs={'lambda_val': 0.025, 'k': 5, 'filter': '', 'n_sentence_context': '0'})"
+       "VectaraRetriever(tags=['Vectara'], metadata=None, vectorstore=<langchain.vectorstores.vectara.Vectara object at 0x1586bd330>, search_type='similarity', search_kwargs={'lambda_val': 0.025, 'k': 5, 'filter': '', 'n_sentence_context': '2'})"
       ]
      },
      "execution_count": 11,
@@ -376,7 +379,7 @@
     {
      "data": {
       "text/plain": [
-       "Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
+       "Document(page_content='Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. A former top litigator in private practice.', metadata={'source': 'langchain', 'lang': 'eng', 'offset': '596', 'len': '97', 'speech': 'state-of-the-union'})"
       ]
      },
      "execution_count": 12,
@@ -414,7 +417,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,
diff --git a/libs/langchain/langchain/vectorstores/vectara.py b/libs/langchain/langchain/vectorstores/vectara.py
index eee2f1abe1f..a0d836dcc29 100644
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@@ -245,6 +245,7 @@ class Vectara(VectorStore):
         k: int = 5,
         lambda_val: float = 0.025,
         filter: Optional[str] = None,
+        score_threshold: Optional[float] = None,
         n_sentence_context: int = 2,
         **kwargs: Any,
     ) -> List[Tuple[Document, float]]:
@@ -258,6 +259,8 @@ class Vectara(VectorStore):
                 filter can be "doc.rating > 3.0 and part.lang = 'deu'"} see
                 https://docs.vectara.com/docs/search-apis/sql/filter-overview
                 for more details.
+            score_threshold: minimal score thresold for the result.
+                If defined, results with score less than this value will be filtered out.
             n_sentence_context: number of sentences before/after the matching segment
                 to add, defaults to 2
 
@@ -305,7 +308,10 @@ class Vectara(VectorStore):
 
         result = response.json()
 
-        responses = result["responseSet"][0]["response"]
+        if score_threshold:
+            responses = [r for r in result["responseSet"][0]["response"] if r["score"] > score_threshold]
+        else:
+            responses = result["responseSet"][0]["response"]
         documents = result["responseSet"][0]["document"]
 
         metadatas = []
@@ -316,7 +322,7 @@ class Vectara(VectorStore):
             md.update(doc_md)
             metadatas.append(md)
 
-        docs = [
+        docs_with_score = [
             (
                 Document(
                     page_content=x["text"],
@@ -327,7 +333,7 @@ class Vectara(VectorStore):
             for x, md in zip(responses, metadatas)
         ]
 
-        return docs
+        return docs_with_score
 
     def similarity_search(
         self,
@@ -358,6 +364,7 @@ class Vectara(VectorStore):
             k=k,
             lambda_val=lambda_val,
             filter=filter,
+            score_threshold=None,
             n_sentence_context=n_sentence_context,
             **kwargs,
         )
diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
index 8fa3cd7f40d..2e79001a1c5 100644
--- a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py
@@ -68,6 +68,19 @@ def test_vectara_add_documents() -> None:
     assert output2[0].page_content == "retrieval augmented generation"
     assert output2[0].metadata["abbr"] == "rag"
 
+    # test without filter but with similarity score
+    # this is similar to the first test, but given the score threshold
+    # we only get one result
+    output3 = docsearch.similarity_search_with_score(
+        "large language model",
+        k=2,
+        score_threshold=0.1,
+        n_sentence_context=0,
+    )
+    assert len(output3) == 1
+    assert output3[0][0].page_content == "large language model"
+    assert output3[0][0].metadata["abbr"] == "llm"
+
     for doc_id in doc_ids:
         docsearch._delete_doc(doc_id)
 

From a5450be32ed387c5a56c7ddf681f6feef6392578 Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofer@vectara.com>
Date: Mon, 28 Aug 2023 22:31:39 -0700
Subject: [PATCH 17/58] fixed lint

---
 libs/langchain/langchain/vectorstores/vectara.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libs/langchain/langchain/vectorstores/vectara.py b/libs/langchain/langchain/vectorstores/vectara.py
index a0d836dcc29..c60048bbb76 100644
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@@ -260,7 +260,8 @@ class Vectara(VectorStore):
                 https://docs.vectara.com/docs/search-apis/sql/filter-overview
                 for more details.
             score_threshold: minimal score thresold for the result.
-                If defined, results with score less than this value will be filtered out.
+                If defined, results with score less than this value will be 
+                filtered out.
             n_sentence_context: number of sentences before/after the matching segment
                 to add, defaults to 2
 
@@ -309,7 +310,11 @@ class Vectara(VectorStore):
         result = response.json()
 
         if score_threshold:
-            responses = [r for r in result["responseSet"][0]["response"] if r["score"] > score_threshold]
+            responses = [
+                r
+                for r in result["responseSet"][0]["response"]
+                if r["score"] > score_threshold
+            ]
         else:
             responses = result["responseSet"][0]["response"]
         documents = result["responseSet"][0]["document"]

From e71f4760db879ff9dc862a6cb021801cf5d75b0e Mon Sep 17 00:00:00 2001
From: hughcrt <me@hugh.sh>
Date: Tue, 29 Aug 2023 07:55:10 +0200
Subject: [PATCH 18/58] Change multiline comment width

---
 .../langchain/callbacks/llmonitor_callback.py  | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/libs/langchain/langchain/callbacks/llmonitor_callback.py b/libs/langchain/langchain/callbacks/llmonitor_callback.py
index 7ecf63897ae..140ccaed40b 100644
--- a/libs/langchain/langchain/callbacks/llmonitor_callback.py
+++ b/libs/langchain/langchain/callbacks/llmonitor_callback.py
@@ -1,7 +1,7 @@
 import os
 import traceback
 from datetime import datetime
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Dict, List, Literal, Union
 from uuid import UUID
 
 import requests
@@ -36,11 +36,15 @@ def _serialize_lc_message(message: BaseMessage) -> Dict[str, Any]:
 class LLMonitorCallbackHandler(BaseCallbackHandler):
     """Initializes the `LLMonitorCallbackHandler`.
     #### Parameters:
-        - `app_id`: The app id of the app you want to report to. Defaults to `None`, which means that `LLMONITOR_APP_ID` will be used.
-        - `api_url`: The url of the LLMonitor API. Defaults to `None`, which means that either `LLMONITOR_API_URL` environment variable or `https://app.llmonitor.com` will be used.
+        - `app_id`: The app id of the app you want to report to. Defaults to
+        `None`, which means that `LLMONITOR_APP_ID` will be used.
+        - `api_url`: The url of the LLMonitor API. Defaults to `None`,
+        which means that either `LLMONITOR_API_URL` environment variable
+        or `https://app.llmonitor.com` will be used.
 
     #### Raises:
-        - `ValueError`: if `app_id` is not provided either as an argument or as an environment variable.
+        - `ValueError`: if `app_id` is not provided either as an
+        argument or as an environment variable.
         - `ConnectionError`: if the connection to the API fails.
 
 
@@ -50,7 +54,8 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
     from langchain.callbacks import LLMonitorCallbackHandler
 
     llmonitor_callback = LLMonitorCallbackHandler()
-    llm = OpenAI(callbacks=[llmonitor_callback], metadata={"userId": "user-123"})
+    llm = OpenAI(callbacks=[llmonitor_callback],
+                 metadata={"userId": "user-123"})
     llm.predict("Hello, how are you?")
     ```
     """
@@ -68,7 +73,8 @@ class LLMonitorCallbackHandler(BaseCallbackHandler):
         _app_id = app_id or os.getenv("LLMONITOR_APP_ID")
         if _app_id is None:
             raise ValueError(
-                "app_id must be provided either as an argument or as an environment variable"
+                """app_id must be provided either as an argument or as 
+                an environment variable"""
             )
         self.__app_id = _app_id
 

From 318a21e2677908267e82fb91be93a106e4ff52dc Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofer@vectara.com>
Date: Mon, 28 Aug 2023 23:01:11 -0700
Subject: [PATCH 19/58] fixed typo in spelling

---
 libs/langchain/langchain/vectorstores/vectara.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/vectorstores/vectara.py b/libs/langchain/langchain/vectorstores/vectara.py
index c60048bbb76..8f041369de0 100644
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@@ -259,7 +259,7 @@ class Vectara(VectorStore):
                 filter can be "doc.rating > 3.0 and part.lang = 'deu'"} see
                 https://docs.vectara.com/docs/search-apis/sql/filter-overview
                 for more details.
-            score_threshold: minimal score thresold for the result.
+            score_threshold: minimal score threshold for the result.
                 If defined, results with score less than this value will be 
                 filtered out.
             n_sentence_context: number of sentences before/after the matching segment

From 445420445585a2fa13aef67f1bb38f08f44ba175 Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofer@vectara.com>
Date: Mon, 28 Aug 2023 23:04:57 -0700
Subject: [PATCH 20/58] reformat black

---
 libs/langchain/langchain/vectorstores/vectara.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/vectorstores/vectara.py b/libs/langchain/langchain/vectorstores/vectara.py
index 8f041369de0..9af1124648b 100644
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@@ -260,7 +260,7 @@ class Vectara(VectorStore):
                 https://docs.vectara.com/docs/search-apis/sql/filter-overview
                 for more details.
             score_threshold: minimal score threshold for the result.
-                If defined, results with score less than this value will be 
+                If defined, results with score less than this value will be
                 filtered out.
             n_sentence_context: number of sentences before/after the matching segment
                 to add, defaults to 2

From bbae8cb88f66a94ad544fb42e2643e8e5c693f0a Mon Sep 17 00:00:00 2001
From: adilkhan <adilkhan.sarsen@nu.edu.kz>
Date: Tue, 29 Aug 2023 12:12:49 +0600
Subject: [PATCH 21/58] Added runtime argument

---
 libs/langchain/langchain/vectorstores/deeplake.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libs/langchain/langchain/vectorstores/deeplake.py b/libs/langchain/langchain/vectorstores/deeplake.py
index ed76018322f..73696d6913c 100644
--- a/libs/langchain/langchain/vectorstores/deeplake.py
+++ b/libs/langchain/langchain/vectorstores/deeplake.py
@@ -62,6 +62,7 @@ class DeepLake(VectorStore):
         num_workers: int = 0,
         verbose: bool = True,
         exec_option: Optional[str] = None,
+        runtime: Optional[Dict] = None,
         **kwargs: Any,
     ) -> None:
         """Creates an empty DeepLakeVectorStore or loads an existing one.
@@ -77,7 +78,7 @@ class DeepLake(VectorStore):
             >>> # Create a vector store in the Deep Lake Managed Tensor Database
             >>> data = DeepLake(
             ...        path = "hub://org_id/dataset_name",
-            ...        exec_option = "tensor_db",
+            ...        runtime = {"tensor_db": True},
             ... )
 
         Args:
@@ -114,6 +115,10 @@ class DeepLake(VectorStore):
                     responsible for storage and query execution. Only for data stored in
                     the Deep Lake Managed Database. Use runtime = {"db_engine": True}
                     during dataset creation.
+            runtime (Dict, optional): Parameters for creating the Vector Store in 
+                Deep Lake's Managed Tensor Database. Not applicable when loading an 
+                existing Vector Store. To create a Vector Store in the Managed Tensor 
+                Database, set `runtime = {"tensor_db": True}`.
             **kwargs: Other optional keyword arguments.
 
         Raises:
@@ -131,11 +136,11 @@ class DeepLake(VectorStore):
             )
 
         if (
-            kwargs.get("runtime") == {"tensor_db": True}
+            runtime == {"tensor_db": True}
             and version_compare(deeplake.__version__, "3.6.7") == -1
         ):
             raise ImportError(
-                "To use tensor_db option you need to update deeplake to `3.6.7`. "
+                "To use tensor_db option you need to update deeplake to `3.6.7` or higher. "
                 f"Currently installed deeplake version is {deeplake.__version__}. "
             )
 
@@ -154,6 +159,7 @@ class DeepLake(VectorStore):
             token=token,
             exec_option=exec_option,
             verbose=verbose,
+            runtime=runtime,
             **kwargs,
         )
 

From 8f8455b24dd548729dfca531dd5aa1630d9cd645 Mon Sep 17 00:00:00 2001
From: LiaoKong <568250549@qq.com>
Date: Tue, 29 Aug 2023 21:55:05 +0800
Subject: [PATCH 22/58] fix a link name format to the dependents document

---
 docs/extras/ecosystem/dependents.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/extras/ecosystem/dependents.mdx b/docs/extras/ecosystem/dependents.mdx
index 2de47167853..b970157ce58 100644
--- a/docs/extras/ecosystem/dependents.mdx
+++ b/docs/extras/ecosystem/dependents.mdx
@@ -51,7 +51,7 @@ Dependents stats for `langchain-ai/langchain`
 |[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 5365 |
 |[mage-ai/mage-ai](https://github.com/mage-ai/mage-ai) | 5352 |
 |[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 5192 |
-|[LangChain-Chinese-Getting-Started-Guide](https://github.com/liaokongVFX/LangChain-Chinese-Getting-Started-Guide) | 5129 |
+|[liaokongVFX/LangChain-Chinese-Getting-Started-Guide](https://github.com/liaokongVFX/LangChain-Chinese-Getting-Started-Guide) | 5129 |
 |[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 4993 |
 |[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 4831 |
 |[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 4824 |

From dd10cf945c9d2d823fee1b77665252d03eb665b9 Mon Sep 17 00:00:00 2001
From: Nikhil Suresh <nikhilsuresh097@gmail.com>
Date: Tue, 29 Aug 2023 14:15:59 +0000
Subject: [PATCH 23/58] fixed minor linting issues

---
 libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
index c93d202bae0..3d34206f06d 100644
--- a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
+++ b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
@@ -28,7 +28,8 @@ from tests.unit_tests.llms.fake_llm import FakeLLM
             "28-pl",
         ),
         (
-            "According to the sources, the agreement is governed by English law.\nSource: 28-pl",
+            "According to the sources, the agreement is governed by English law.\n"
+            "Source: 28-pl",
             "According to the sources, the agreement is governed by English law.\n",
             "28-pl",
         ),

From c80e406e95b5dd7b0d2c08f215b7ee0a6d49740e Mon Sep 17 00:00:00 2001
From: Mike Nitsenko <mnitsenko@gmail.com>
Date: Tue, 29 Aug 2023 20:21:01 +0600
Subject: [PATCH 24/58] Cube semantic loader: allow cubes processing (#9927)

We've started to receive feedback (after launch) that using only views
is confusing.
We're considering this as a good practice, as a view serves as a
"facade" for your data - however, we decided to let users decide this on
their own.

Solves the questions from:
- https://github.com/cube-js/cube/issues/7028
- https://github.com/langchain-ai/langchain/pull/9690
---
 .../document_loaders/cube_semantic.ipynb      | 32 ++++++++++++++++---
 .../document_loaders/cube_semantic.py         | 31 ++++++++++++------
 .../document_loaders/test_cube_semantic.py    |  1 +
 3 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/docs/extras/integrations/document_loaders/cube_semantic.ipynb b/docs/extras/integrations/document_loaders/cube_semantic.ipynb
index 5868d58c0ff..68b1702d933 100644
--- a/docs/extras/integrations/document_loaders/cube_semantic.ipynb
+++ b/docs/extras/integrations/document_loaders/cube_semantic.ipynb
@@ -106,15 +106,39 @@
     "  - `column_data_type`\n",
     "  - `column_title`\n",
     "  - `column_description`\n",
-    "  - `column_values`"
+    "  - `column_values`\n",
+    "  - `cube_data_obj_type`"
    ]
   },
   {
-   "attachments": {},
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "> page_content='Users View City, None' metadata={'table_name': 'users_view', 'column_name': 'users_view.city', 'column_data_type': 'string', 'column_title': 'Users View City', 'column_description': 'None', 'column_member_type': 'dimension', 'column_values': ['Austin', 'Chicago', 'Los Angeles', 'Mountain View', 'New York', 'Palo Alto', 'San Francisco', 'Seattle']}"
+    "# Given string containing page content\n",
+    "page_content = 'Users View City, None'\n",
+    "\n",
+    "# Given dictionary containing metadata\n",
+    "metadata = {\n",
+    "    'table_name': 'users_view',\n",
+    "    'column_name': 'users_view.city',\n",
+    "    'column_data_type': 'string',\n",
+    "    'column_title': 'Users View City',\n",
+    "    'column_description': 'None',\n",
+    "    'column_member_type': 'dimension',\n",
+    "    'column_values': [\n",
+    "        'Austin',\n",
+    "        'Chicago',\n",
+    "        'Los Angeles',\n",
+    "        'Mountain View',\n",
+    "        'New York',\n",
+    "        'Palo Alto',\n",
+    "        'San Francisco',\n",
+    "        'Seattle'\n",
+    "    ],\n",
+    "    'cube_data_obj_type': 'view'\n",
+    "}"
    ]
   }
  ],
diff --git a/libs/langchain/langchain/document_loaders/cube_semantic.py b/libs/langchain/langchain/document_loaders/cube_semantic.py
index a29a41f02fb..91364a92827 100644
--- a/libs/langchain/langchain/document_loaders/cube_semantic.py
+++ b/libs/langchain/langchain/document_loaders/cube_semantic.py
@@ -113,27 +113,39 @@ class CubeSemanticLoader(BaseLoader):
                     - column_title
                     - column_description
                     - column_values
+                    - cube_data_obj_type
         """
         headers = {
             "Content-Type": "application/json",
             "Authorization": self.cube_api_token,
         }
 
+        logger.info(f"Loading metadata from {self.cube_api_url}...")
         response = requests.get(f"{self.cube_api_url}/meta", headers=headers)
         response.raise_for_status()
         raw_meta_json = response.json()
-        cubes = raw_meta_json.get("cubes", [])
+        cube_data_objects = raw_meta_json.get("cubes", [])
+
+        logger.info(f"Found {len(cube_data_objects)} cube data objects in metadata.")
+
+        if not cube_data_objects:
+            raise ValueError("No cubes found in metadata.")
+
         docs = []
 
-        for cube in cubes:
-            if cube.get("type") != "view":
+        for cube_data_obj in cube_data_objects:
+            cube_data_obj_name = cube_data_obj.get("name")
+            cube_data_obj_type = cube_data_obj.get("type")
+            cube_data_obj_is_public = cube_data_obj.get("public")
+            measures = cube_data_obj.get("measures", [])
+            dimensions = cube_data_obj.get("dimensions", [])
+
+            logger.info(f"Processing {cube_data_obj_name}...")
+
+            if not cube_data_obj_is_public:
+                logger.info(f"Skipping {cube_data_obj_name} because it is not public.")
                 continue
 
-            cube_name = cube.get("name")
-
-            measures = cube.get("measures", [])
-            dimensions = cube.get("dimensions", [])
-
             for item in measures + dimensions:
                 column_member_type = "measure" if item in measures else "dimension"
                 dimension_values = []
@@ -148,13 +160,14 @@ class CubeSemanticLoader(BaseLoader):
                     dimension_values = self._get_dimension_values(item_name)
 
                 metadata = dict(
-                    table_name=str(cube_name),
+                    table_name=str(cube_data_obj_name),
                     column_name=item_name,
                     column_data_type=item_type,
                     column_title=str(item.get("title")),
                     column_description=str(item.get("description")),
                     column_member_type=column_member_type,
                     column_values=dimension_values,
+                    cube_data_obj_type=cube_data_obj_type,
                 )
 
                 page_content = f"{str(item.get('title'))}, "
diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_cube_semantic.py b/libs/langchain/tests/unit_tests/document_loaders/test_cube_semantic.py
index c309f17ffe4..70a71736aa6 100644
--- a/libs/langchain/tests/unit_tests/document_loaders/test_cube_semantic.py
+++ b/libs/langchain/tests/unit_tests/document_loaders/test_cube_semantic.py
@@ -35,6 +35,7 @@ class TestCubeSemanticLoader(unittest.TestCase):
                 {
                     "name": "test_cube",
                     "type": "view",
+                    "public": True,
                     "measures": [],
                     "dimensions": [
                         {

From 171b0b183ba4f09a4b9ec3bb79f9ba554aeffee0 Mon Sep 17 00:00:00 2001
From: Tudor Golubenco <tudor@xata.io>
Date: Tue, 29 Aug 2023 16:21:22 +0200
Subject: [PATCH 25/58] Pre-release Xata version no longer required (#9915)

Tiny PR: Since we've released version 1.0.0 of the python SDK, we no
longer need to specify the pre-release version when pip installing.
---
 docs/extras/integrations/memory/xata_chat_message_history.ipynb | 2 +-
 docs/extras/integrations/vectorstores/xata.ipynb                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/extras/integrations/memory/xata_chat_message_history.ipynb b/docs/extras/integrations/memory/xata_chat_message_history.ipynb
index 938f6c44b90..2174014f7cc 100644
--- a/docs/extras/integrations/memory/xata_chat_message_history.ipynb
+++ b/docs/extras/integrations/memory/xata_chat_message_history.ipynb
@@ -38,7 +38,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install xata==1.0.0rc0 openai langchain"
+    "!pip install xata openai langchain"
    ]
   },
   {
diff --git a/docs/extras/integrations/vectorstores/xata.ipynb b/docs/extras/integrations/vectorstores/xata.ipynb
index 42f2e012e75..d04b64afd32 100644
--- a/docs/extras/integrations/vectorstores/xata.ipynb
+++ b/docs/extras/integrations/vectorstores/xata.ipynb
@@ -52,7 +52,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install xata==1.0.0a7 openai tiktoken langchain"
+    "!pip install xata openai tiktoken langchain"
    ]
   },
   {

From 49ebbe4bcde3bf297a1c36d2e7ec5481f1bd1019 Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Tue, 29 Aug 2023 07:53:01 -0700
Subject: [PATCH 26/58] fix pydantic import (#9930)

---
 libs/langchain/langchain/retrievers/multi_vector.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libs/langchain/langchain/retrievers/multi_vector.py b/libs/langchain/langchain/retrievers/multi_vector.py
index 92d537189b9..434e6f4fccb 100644
--- a/libs/langchain/langchain/retrievers/multi_vector.py
+++ b/libs/langchain/langchain/retrievers/multi_vector.py
@@ -1,8 +1,7 @@
 from typing import List
 
-from pydantic import Field
-
 from langchain.callbacks.manager import CallbackManagerForRetrieverRun
+from langchain.pydantic_v1 import Field
 from langchain.schema import BaseRetriever, BaseStore, Document
 from langchain.vectorstores import VectorStore
 

From db13fba7ea28131a296d3f898b92cf9d1d27ba79 Mon Sep 17 00:00:00 2001
From: Tomaz Bratanic <bratanic.tomaz@gmail.com>
Date: Tue, 29 Aug 2023 16:54:20 +0200
Subject: [PATCH 27/58] Add neo4j vector support (#9770)

Neo4j has added vector index integration just recently. To allow both
ingestion and integrating it as vector RAG applications, I wrapped it as
a vector store as the implementation is completely different from
`GraphCypherQAChain`. Here, we are not generating any Cypher statements
at query time, we are simply doing the vector similarity search using
the new vector index as if we were dealing with a vector database.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 .../vectorstores/neo4jvector.ipynb            | 440 +++++++++++
 .../langchain/vectorstores/__init__.py        |   2 +
 .../langchain/vectorstores/neo4j_vector.py    | 685 ++++++++++++++++++
 .../vectorstores/docker-compose/neo4j.yml     |  12 +
 .../vectorstores/test_neo4jvector.py          | 333 +++++++++
 5 files changed, 1472 insertions(+)
 create mode 100644 docs/extras/integrations/vectorstores/neo4jvector.ipynb
 create mode 100644 libs/langchain/langchain/vectorstores/neo4j_vector.py
 create mode 100644 libs/langchain/tests/integration_tests/vectorstores/docker-compose/neo4j.yml
 create mode 100644 libs/langchain/tests/integration_tests/vectorstores/test_neo4jvector.py

diff --git a/docs/extras/integrations/vectorstores/neo4jvector.ipynb b/docs/extras/integrations/vectorstores/neo4jvector.ipynb
new file mode 100644
index 00000000000..3250e05d23e
--- /dev/null
+++ b/docs/extras/integrations/vectorstores/neo4jvector.ipynb
@@ -0,0 +1,440 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Neo4j Vector Index\n",
+    "\n",
+    ">[Neo4j](https://neo4j.com/) is an open-source graph database with integrated support for vector similarity search\n",
+    "\n",
+    "It supports:\n",
+    "- approximate nearest neighbor search\n",
+    "- L2 distance and cosine distance\n",
+    "\n",
+    "This notebook shows how to use the Neo4j vector index (`Neo4jVector`)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "See the [installation instruction](https://neo4j.com/docs/operations-manual/current/installation/)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: neo4j in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (5.11.0)\n",
+      "Requirement already satisfied: pytz in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from neo4j) (2023.3)\n",
+      "Requirement already satisfied: openai in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (0.27.6)\n",
+      "Requirement already satisfied: requests>=2.20 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from openai) (2.31.0)\n",
+      "Requirement already satisfied: tqdm in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from openai) (4.66.1)\n",
+      "Requirement already satisfied: aiohttp in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from openai) (3.8.5)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.20->openai) (3.2.0)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.20->openai) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.20->openai) (2.0.4)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.20->openai) (2023.7.22)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (23.1.0)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (6.0.4)\n",
+      "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (4.0.3)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (1.9.2)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (1.4.0)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (1.3.1)\n",
+      "Requirement already satisfied: tiktoken in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (0.4.0)\n",
+      "Requirement already satisfied: regex>=2022.1.18 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from tiktoken) (2023.8.8)\n",
+      "Requirement already satisfied: requests>=2.26.0 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from tiktoken) (2.31.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Pip install necessary package\n",
+    "!pip install neo4j\n",
+    "!pip install openai\n",
+    "!pip install tiktoken"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "OpenAI API Key: ········\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import getpass\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+    "from langchain.text_splitter import CharacterTextSplitter\n",
+    "from langchain.vectorstores import Neo4jVector\n",
+    "from langchain.document_loaders import TextLoader\n",
+    "from langchain.docstore.document import Document"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
+    "documents = loader.load()\n",
+    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
+    "docs = text_splitter.split_documents(documents)\n",
+    "\n",
+    "embeddings = OpenAIEmbeddings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Neo4jVector requires the Neo4j database credentials\n",
+    "\n",
+    "url = \"bolt://localhost:7687\"\n",
+    "username = \"neo4j\"\n",
+    "password = \"pleaseletmein\"\n",
+    "\n",
+    "# You can also use environment variables instead of directly passing named parameters\n",
+    "#os.environ[\"NEO4J_URL\"] = \"bolt://localhost:7687\"\n",
+    "#os.environ[\"NEO4J_USERNAME\"] = \"neo4j\"\n",
+    "#os.environ[\"NEO4J_PASSWORD\"] = \"pleaseletmein\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Similarity Search with Cosine Distance (Default)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The Neo4jVector Module will connect to Neo4j and create a vector index if needed.\n",
+    "\n",
+    "db = Neo4jVector.from_documents(\n",
+    "    docs, OpenAIEmbeddings(), url=url, username=username, password=password\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+    "docs_with_score = db.similarity_search_with_score(query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "Score:  0.9077161550521851\n",
+      "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
+      "\n",
+      "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
+      "\n",
+      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
+      "\n",
+      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
+      "--------------------------------------------------------------------------------\n",
+      "--------------------------------------------------------------------------------\n",
+      "Score:  0.9077161550521851\n",
+      "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
+      "\n",
+      "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
+      "\n",
+      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
+      "\n",
+      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
+      "--------------------------------------------------------------------------------\n",
+      "--------------------------------------------------------------------------------\n",
+      "Score:  0.891287088394165\n",
+      "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
+      "\n",
+      "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
+      "\n",
+      "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \n",
+      "\n",
+      "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \n",
+      "\n",
+      "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
+      "\n",
+      "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
+      "--------------------------------------------------------------------------------\n",
+      "--------------------------------------------------------------------------------\n",
+      "Score:  0.891287088394165\n",
+      "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
+      "\n",
+      "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
+      "\n",
+      "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \n",
+      "\n",
+      "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \n",
+      "\n",
+      "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
+      "\n",
+      "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "for doc, score in docs_with_score:\n",
+    "    print(\"-\" * 80)\n",
+    "    print(\"Score: \", score)\n",
+    "    print(doc.page_content)\n",
+    "    print(\"-\" * 80)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Working with vectorstore\n",
+    "\n",
+    "Above, we created a vectorstore from scratch. However, often times we want to work with an existing vectorstore.\n",
+    "In order to do that, we can initialize it directly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index_name = \"vector\"  # default index name\n",
+    "\n",
+    "store = Neo4jVector.from_existing_index(\n",
+    "    OpenAIEmbeddings(),\n",
+    "    url=url,\n",
+    "    username=username,\n",
+    "    password=password,\n",
+    "    index_name=index_name,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Add documents\n",
+    "We can add documents to the existing vectorstore."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['2f70679a-4416-11ee-b7c3-d46a6aa24f5b']"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "store.add_documents([Document(page_content=\"foo\")])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "docs_with_score = store.similarity_search_with_score(\"foo\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(Document(page_content='foo', metadata={}), 1.0)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs_with_score[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Retriever options\n",
+    "\n",
+    "This section shows how to use `Neo4jVector` as a retriever."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt'})"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "retriever = store.as_retriever()\n",
+    "retriever.get_relevant_documents(query)[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question Answering with Sources\n",
+    "\n",
+    "This section goes over how to do question-answering with sources over an Index. It does this by using the `RetrievalQAWithSourcesChain`, which does the lookup of the documents from an Index. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains import RetrievalQAWithSourcesChain\n",
+    "from langchain.chat_models import ChatOpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = RetrievalQAWithSourcesChain.from_chain_type(\n",
+    "    ChatOpenAI(temperature=0), chain_type=\"stuff\", retriever=retriever\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'answer': \"The president honored Justice Stephen Breyer, who is retiring from the United States Supreme Court, and thanked him for his service. The president also mentioned that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to continue Justice Breyer's legacy of excellence. \\n\",\n",
+       " 'sources': '../../modules/state_of_the_union.txt'}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain(\n",
+    "    {\"question\": \"What did the president say about Justice Breyer\"},\n",
+    "    return_only_outputs=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/libs/langchain/langchain/vectorstores/__init__.py b/libs/langchain/langchain/vectorstores/__init__.py
index d8e3664e001..ccbdfad6e55 100644
--- a/libs/langchain/langchain/vectorstores/__init__.py
+++ b/libs/langchain/langchain/vectorstores/__init__.py
@@ -52,6 +52,7 @@ from langchain.vectorstores.meilisearch import Meilisearch
 from langchain.vectorstores.milvus import Milvus
 from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch
 from langchain.vectorstores.myscale import MyScale, MyScaleSettings
+from langchain.vectorstores.neo4j_vector import Neo4jVector
 from langchain.vectorstores.opensearch_vector_search import OpenSearchVectorSearch
 from langchain.vectorstores.pgembedding import PGEmbedding
 from langchain.vectorstores.pgvector import PGVector
@@ -110,6 +111,7 @@ __all__ = [
     "MongoDBAtlasVectorSearch",
     "MyScale",
     "MyScaleSettings",
+    "Neo4jVector",
     "OpenSearchVectorSearch",
     "OpenSearchVectorSearch",
     "PGEmbedding",
diff --git a/libs/langchain/langchain/vectorstores/neo4j_vector.py b/libs/langchain/langchain/vectorstores/neo4j_vector.py
new file mode 100644
index 00000000000..01ee7b27b19
--- /dev/null
+++ b/libs/langchain/langchain/vectorstores/neo4j_vector.py
@@ -0,0 +1,685 @@
+from __future__ import annotations
+
+import logging
+import uuid
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Type,
+)
+
+from langchain.docstore.document import Document
+from langchain.embeddings.base import Embeddings
+from langchain.utils import get_from_env
+from langchain.vectorstores.base import VectorStore
+from langchain.vectorstores.utils import DistanceStrategy
+
+DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.COSINE
+
+distance_mapping = {
+    DistanceStrategy.EUCLIDEAN_DISTANCE: "euclidean",
+    DistanceStrategy.COSINE: "cosine",
+}
+
+
+def check_if_not_null(props: List[str], values: List[Any]) -> None:
+    for prop, value in zip(props, values):
+        if not value:
+            raise ValueError(f"Parameter `{prop}` must not be None or empty string")
+
+
+def sort_by_index_name(
+    lst: List[Dict[str, Any]], index_name: str
+) -> List[Dict[str, Any]]:
+    """Sort first element to match the index_name if exists"""
+    return sorted(lst, key=lambda x: x.get("index_name") != index_name)
+
+
+class Neo4jVector(VectorStore):
+    """`Neo4j` vector index.
+
+    To use, you should have the ``neo4j`` python package installed.
+
+    Args:
+        url: Neo4j connection url
+        username: Neo4j username.
+        password: Neo4j password
+        database: Optionally provide Neo4j database
+                  Defaults to "neo4j"
+        embedding: Any embedding function implementing
+            `langchain.embeddings.base.Embeddings` interface.
+        distance_strategy: The distance strategy to use. (default: COSINE)
+        pre_delete_collection: If True, will delete existing data if it exists.
+            (default: False). Useful for testing.
+
+    Example:
+        .. code-block:: python
+
+            from langchain.vectorstores.neo4j_vector import Neo4jVector
+            from langchain.embeddings.openai import OpenAIEmbeddings
+
+            url="bolt://localhost:7687"
+            username="neo4j"
+            password="pleaseletmein"
+            embeddings = OpenAIEmbeddings()
+            vectorestore = Neo4jVector.from_documents(
+                embedding=embeddings,
+                documents=docs,
+                url=url
+                username=username,
+                password=password,
+            )
+
+
+    """
+
+    def __init__(
+        self,
+        embedding: Embeddings,
+        *,
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+        url: Optional[str] = None,
+        database: str = "neo4j",
+        index_name: str = "vector",
+        node_label: str = "Chunk",
+        embedding_node_property: str = "embedding",
+        text_node_property: str = "text",
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        logger: Optional[logging.Logger] = None,
+        pre_delete_collection: bool = False,
+        retrieval_query: str = "",
+        relevance_score_fn: Optional[Callable[[float], float]] = None,
+    ) -> None:
+        try:
+            import neo4j
+        except ImportError:
+            raise ImportError(
+                "Could not import neo4j python package. "
+                "Please install it with `pip install neo4j`."
+            )
+
+        # Allow only cosine and euclidean distance strategies
+        if distance_strategy not in [
+            DistanceStrategy.EUCLIDEAN_DISTANCE,
+            DistanceStrategy.COSINE,
+        ]:
+            raise ValueError(
+                "distance_strategy must be either 'EUCLIDEAN_DISTANCE' or 'COSINE'"
+            )
+
+        # Handle if the credentials are environment variables
+        url = get_from_env("url", "NEO4J_URL", url)
+        username = get_from_env("username", "NEO4J_USERNAME", username)
+        password = get_from_env("password", "NEO4J_PASSWORD", password)
+        database = get_from_env("database", "NEO4J_DATABASE", database)
+
+        self._driver = neo4j.GraphDatabase.driver(url, auth=(username, password))
+        self._database = database
+        self.schema = ""
+        # Verify connection
+        try:
+            self._driver.verify_connectivity()
+        except neo4j.exceptions.ServiceUnavailable:
+            raise ValueError(
+                "Could not connect to Neo4j database. "
+                "Please ensure that the url is correct"
+            )
+        except neo4j.exceptions.AuthError:
+            raise ValueError(
+                "Could not connect to Neo4j database. "
+                "Please ensure that the username and password are correct"
+            )
+
+        # Verify if the version support vector index
+        self.verify_version()
+
+        # Verify that required values are not null
+        check_if_not_null(
+            [
+                "index_name",
+                "node_label",
+                "embedding_node_property",
+                "text_node_property",
+            ],
+            [index_name, node_label, embedding_node_property, text_node_property],
+        )
+
+        self.embedding = embedding
+        self._distance_strategy = distance_strategy
+        self.index_name = index_name
+        self.node_label = node_label
+        self.embedding_node_property = embedding_node_property
+        self.text_node_property = text_node_property
+        self.logger = logger or logging.getLogger(__name__)
+        self.override_relevance_score_fn = relevance_score_fn
+        self.retrieval_query = retrieval_query
+        # Calculate embedding dimension
+        self.embedding_dimension = len(embedding.embed_query("foo"))
+
+        # Delete existing data if flagged
+        if pre_delete_collection:
+            from neo4j.exceptions import DatabaseError
+
+            self.query(
+                f"MATCH (n:`{self.node_label}`) "
+                "CALL { WITH n DETACH DELETE n } "
+                "IN TRANSACTIONS OF 10000 ROWS;"
+            )
+            # Delete index
+            try:
+                self.query(f"DROP INDEX {self.index_name}")
+            except DatabaseError:  # Index didn't exist yet
+                pass
+
+    def query(
+        self, query: str, *, params: Optional[dict] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        This method sends a Cypher query to the connected Neo4j database
+        and returns the results as a list of dictionaries.
+
+        Args:
+            query (str): The Cypher query to execute.
+            params (dict, optional): Dictionary of query parameters. Defaults to {}.
+
+        Returns:
+            List[Dict[str, Any]]: List of dictionaries containing the query results.
+        """
+        from neo4j.exceptions import CypherSyntaxError
+
+        params = params or {}
+        with self._driver.session(database=self._database) as session:
+            try:
+                data = session.run(query, params)
+                return [r.data() for r in data]
+            except CypherSyntaxError as e:
+                raise ValueError(f"Cypher Statement is not valid\n{e}")
+
+    def verify_version(self) -> None:
+        """
+        Check if the connected Neo4j database version supports vector indexing.
+
+        Queries the Neo4j database to retrieve its version and compares it
+        against a target version (5.11.0) that is known to support vector
+        indexing. Raises a ValueError if the connected Neo4j version is
+        not supported.
+        """
+        version = self.query("CALL dbms.components()")[0]["versions"][0]
+        if "aura" in version:
+            version_tuple = tuple(map(int, version.split("-")[0].split("."))) + (0,)
+        else:
+            version_tuple = tuple(map(int, version.split(".")))
+
+        target_version = (5, 11, 0)
+
+        if version_tuple < target_version:
+            raise ValueError(
+                "Version index is only supported in Neo4j version 5.11 or greater"
+            )
+
+    def retrieve_existing_index(self) -> Optional[int]:
+        """
+        Check if the vector index exists in the Neo4j database
+        and returns its embedding dimension.
+
+        This method queries the Neo4j database for existing indexes
+        and attempts to retrieve the dimension of the vector index
+        with the specified name. If the index exists, its dimension is returned.
+        If the index doesn't exist, `None` is returned.
+
+        Returns:
+            int or None: The embedding dimension of the existing index if found.
+        """
+
+        index_information = self.query(
+            "SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options "
+            "WHERE type = 'VECTOR' AND (name = $index_name "
+            "OR (labelsOrTypes[0] = $node_label AND "
+            "properties[0] = $embedding_node_property)) "
+            "RETURN name, labelsOrTypes, properties, options ",
+            params={
+                "index_name": self.index_name,
+                "node_label": self.node_label,
+                "embedding_node_property": self.embedding_node_property,
+            },
+        )
+        # sort by index_name
+        index_information = sort_by_index_name(index_information, self.index_name)
+        try:
+            self.index_name = index_information[0]["name"]
+            self.node_label = index_information[0]["labelsOrTypes"][0]
+            self.embedding_node_property = index_information[0]["properties"][0]
+            embedding_dimension = index_information[0]["options"]["indexConfig"][
+                "vector.dimensions"
+            ]
+
+            return embedding_dimension
+        except IndexError:
+            return None
+
+    def create_new_index(self) -> None:
+        """
+        This method constructs a Cypher query and executes it
+        to create a new vector index in Neo4j.
+        """
+        index_query = (
+            "CALL db.index.vector.createNodeIndex("
+            "$index_name,"
+            "$node_label,"
+            "$embedding_node_property,"
+            "toInteger($embedding_dimension),"
+            "$similarity_metric )"
+        )
+
+        parameters = {
+            "index_name": self.index_name,
+            "node_label": self.node_label,
+            "embedding_node_property": self.embedding_node_property,
+            "embedding_dimension": self.embedding_dimension,
+            "similarity_metric": distance_mapping[self._distance_strategy],
+        }
+        self.query(index_query, params=parameters)
+
+    @property
+    def embeddings(self) -> Embeddings:
+        return self.embedding
+
+    @classmethod
+    def __from(
+        cls,
+        texts: List[str],
+        embeddings: List[List[float]],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        create_id_index: bool = True,
+        **kwargs: Any,
+    ) -> Neo4jVector:
+        if ids is None:
+            ids = [str(uuid.uuid1()) for _ in texts]
+
+        if not metadatas:
+            metadatas = [{} for _ in texts]
+
+        store = cls(
+            embedding=embedding,
+            **kwargs,
+        )
+
+        # Check if the index already exists
+        embedding_dimension = store.retrieve_existing_index()
+
+        # If the index doesn't exist yet
+        if not embedding_dimension:
+            store.create_new_index()
+        # If the index already exists, check if embedding dimensions match
+        elif not store.embedding_dimension == embedding_dimension:
+            raise ValueError(
+                f"Index with name {store.index_name} already exists."
+                "The provided embedding function and vector index "
+                "dimensions do not match.\n"
+                f"Embedding function dimension: {store.embedding_dimension}\n"
+                f"Vector index dimension: {embedding_dimension}"
+            )
+
+        # Create unique constraint for faster import
+        if create_id_index:
+            store.query(
+                "CREATE CONSTRAINT IF NOT EXISTS "
+                f"FOR (n:`{store.node_label}`) REQUIRE n.id IS UNIQUE;"
+            )
+
+        store.add_embeddings(
+            texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
+        )
+
+        return store
+
+    def add_embeddings(
+        self,
+        texts: Iterable[str],
+        embeddings: List[List[float]],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Add embeddings to the vectorstore.
+
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            embeddings: List of list of embedding vectors.
+            metadatas: List of metadatas associated with the texts.
+            kwargs: vectorstore specific parameters
+        """
+        if ids is None:
+            ids = [str(uuid.uuid1()) for _ in texts]
+
+        if not metadatas:
+            metadatas = [{} for _ in texts]
+
+        import_query = (
+            "UNWIND $data AS row "
+            "CALL { WITH row "
+            f"MERGE (c:`{self.node_label}` {{id: row.id}}) "
+            "WITH c, row "
+            f"CALL db.create.setVectorProperty(c, "
+            f"'{self.embedding_node_property}', row.embedding) "
+            "YIELD node "
+            f"SET c.`{self.text_node_property}` = row.text "
+            "SET c += row.metadata } IN TRANSACTIONS OF 1000 ROWS"
+        )
+
+        parameters = {
+            "data": [
+                {"text": text, "metadata": metadata, "embedding": embedding, "id": id}
+                for text, metadata, embedding, id in zip(
+                    texts, metadatas, embeddings, ids
+                )
+            ]
+        }
+
+        self.query(import_query, params=parameters)
+
+        return ids
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+            kwargs: vectorstore specific parameters
+
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
+        embeddings = self.embedding.embed_documents(list(texts))
+        return self.add_embeddings(
+            texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
+        )
+
+    def similarity_search(
+        self,
+        query: str,
+        k: int = 4,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Run similarity search with Neo4jVector.
+
+        Args:
+            query (str): Query text to search for.
+            k (int): Number of results to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query.
+        """
+        embedding = self.embedding.embed_query(text=query)
+        return self.similarity_search_by_vector(
+            embedding=embedding,
+            k=k,
+        )
+
+    def similarity_search_with_score(
+        self, query: str, k: int = 4
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query and score for each
+        """
+        embedding = self.embedding.embed_query(query)
+        docs = self.similarity_search_with_score_by_vector(embedding=embedding, k=k)
+        return docs
+
+    def similarity_search_with_score_by_vector(
+        self, embedding: List[float], k: int = 4
+    ) -> List[Tuple[Document, float]]:
+        """
+        Perform a similarity search in the Neo4j database using a
+        given vector and return the top k similar documents with their scores.
+
+        This method uses a Cypher query to find the top k documents that
+        are most similar to a given embedding. The similarity is measured
+        using a vector index in the Neo4j database. The results are returned
+        as a list of tuples, each containing a Document object and
+        its similarity score.
+
+        Args:
+            embedding (List[float]): The embedding vector to compare against.
+            k (int, optional): The number of top similar documents to retrieve.
+
+        Returns:
+            List[Tuple[Document, float]]: A list of tuples, each containing
+                                a Document object and its similarity score.
+        """
+        default_retrieval = (
+            f"RETURN node.`{self.text_node_property}` AS text, score, "
+            f"node {{.*, `{self.text_node_property}`: Null, "
+            f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
+        )
+
+        retrieval_query = (
+            self.retrieval_query if self.retrieval_query else default_retrieval
+        )
+
+        read_query = (
+            "CALL db.index.vector.queryNodes($index, $k, $embedding) "
+            "YIELD node, score "
+        ) + retrieval_query
+
+        parameters = {"index": self.index_name, "k": k, "embedding": embedding}
+
+        results = self.query(read_query, params=parameters)
+
+        docs = [
+            (
+                Document(
+                    page_content=result["text"],
+                    metadata={
+                        k: v for k, v in result["metadata"].items() if v is not None
+                    },
+                ),
+                result["score"],
+            )
+            for result in results
+        ]
+        return docs
+
+    def similarity_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query vector.
+        """
+        docs_and_scores = self.similarity_search_with_score_by_vector(
+            embedding=embedding, k=k
+        )
+        return [doc for doc, _ in docs_and_scores]
+
+    @classmethod
+    def from_texts(
+        cls: Type[Neo4jVector],
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> Neo4jVector:
+        """
+        Return Neo4jVector initialized from texts and embeddings.
+        Neo4j credentials are required in the form of `url`, `username`,
+        and `password` and optional `database` parameters.
+        """
+        embeddings = embedding.embed_documents(list(texts))
+
+        return cls.__from(
+            texts,
+            embeddings,
+            embedding,
+            metadatas=metadatas,
+            ids=ids,
+            distance_strategy=distance_strategy,
+            **kwargs,
+        )
+
+    @classmethod
+    def from_embeddings(
+        cls,
+        text_embeddings: List[Tuple[str, List[float]]],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        ids: Optional[List[str]] = None,
+        pre_delete_collection: bool = False,
+        **kwargs: Any,
+    ) -> Neo4jVector:
+        """Construct Neo4jVector wrapper from raw documents and pre-
+        generated embeddings.
+
+        Return Neo4jVector initialized from documents and embeddings.
+        Neo4j credentials are required in the form of `url`, `username`,
+        and `password` and optional `database` parameters.
+
+        Example:
+            .. code-block:: python
+
+                from langchain.vectorstores.neo4j_vector import Neo4jVector
+                from langchain.embeddings import OpenAIEmbeddings
+                embeddings = OpenAIEmbeddings()
+                text_embeddings = embeddings.embed_documents(texts)
+                text_embedding_pairs = list(zip(texts, text_embeddings))
+                vectorstore = Neo4jVector.from_embeddings(
+                    text_embedding_pairs, embeddings)
+        """
+        texts = [t[0] for t in text_embeddings]
+        embeddings = [t[1] for t in text_embeddings]
+
+        return cls.__from(
+            texts,
+            embeddings,
+            embedding,
+            metadatas=metadatas,
+            ids=ids,
+            distance_strategy=distance_strategy,
+            pre_delete_collection=pre_delete_collection,
+            **kwargs,
+        )
+
+    @classmethod
+    def from_existing_index(
+        cls: Type[Neo4jVector],
+        embedding: Embeddings,
+        index_name: str,
+        **kwargs: Any,
+    ) -> Neo4jVector:
+        """
+        Get instance of an existing Neo4j vector index. This method will
+        return the instance of the store without inserting any new
+        embeddings.
+        Neo4j credentials are required in the form of `url`, `username`,
+        and `password` and optional `database` parameters along with
+        the `index_name` definition.
+        """
+
+        store = cls(
+            embedding=embedding,
+            index_name=index_name,
+            **kwargs,
+        )
+
+        embedding_dimension = store.retrieve_existing_index()
+
+        if not embedding_dimension:
+            raise ValueError(
+                "The specified vector index name does not exist. "
+                "Make sure to check if you spelled it correctly"
+            )
+
+        # Check if embedding function and vector index dimensions match
+        if not store.embedding_dimension == embedding_dimension:
+            raise ValueError(
+                "The provided embedding function and vector index "
+                "dimensions do not match.\n"
+                f"Embedding function dimension: {store.embedding_dimension}\n"
+                f"Vector index dimension: {embedding_dimension}"
+            )
+
+        return store
+
+    @classmethod
+    def from_documents(
+        cls: Type[Neo4jVector],
+        documents: List[Document],
+        embedding: Embeddings,
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> Neo4jVector:
+        """
+        Return Neo4jVector initialized from documents and embeddings.
+        Neo4j credentials are required in the form of `url`, `username`,
+        and `password` and optional `database` parameters.
+        """
+
+        texts = [d.page_content for d in documents]
+        metadatas = [d.metadata for d in documents]
+
+        return cls.from_texts(
+            texts=texts,
+            embedding=embedding,
+            distance_strategy=distance_strategy,
+            metadatas=metadatas,
+            ids=ids,
+            **kwargs,
+        )
+
+    def _select_relevance_score_fn(self) -> Callable[[float], float]:
+        """
+        The 'correct' relevance function
+        may differ depending on a few things, including:
+        - the distance / similarity metric used by the VectorStore
+        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
+        - embedding dimensionality
+        - etc.
+        """
+        if self.override_relevance_score_fn is not None:
+            return self.override_relevance_score_fn
+
+        # Default strategy is to rely on distance strategy provided
+        # in vectorstore constructor
+        if self._distance_strategy == DistanceStrategy.COSINE:
+            return lambda x: x
+        elif self._distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE:
+            return lambda x: x
+        else:
+            raise ValueError(
+                "No supported normalization function"
+                f" for distance_strategy of {self._distance_strategy}."
+                "Consider providing relevance_score_fn to PGVector constructor."
+            )
diff --git a/libs/langchain/tests/integration_tests/vectorstores/docker-compose/neo4j.yml b/libs/langchain/tests/integration_tests/vectorstores/docker-compose/neo4j.yml
new file mode 100644
index 00000000000..8ca9ebca0a6
--- /dev/null
+++ b/libs/langchain/tests/integration_tests/vectorstores/docker-compose/neo4j.yml
@@ -0,0 +1,12 @@
+version: "3.8"
+services:
+  neo4j:
+    image: neo4j:5.11.0
+    restart: on-failure:0
+    hostname: neo4j-test
+    container_name: neo4j-test
+    ports:
+      - 7474:7474
+      - 7687:7687
+    environment:
+      - NEO4J_AUTH=neo4j/pleaseletmein
diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_neo4jvector.py b/libs/langchain/tests/integration_tests/vectorstores/test_neo4jvector.py
new file mode 100644
index 00000000000..6af4291a49c
--- /dev/null
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_neo4jvector.py
@@ -0,0 +1,333 @@
+"""Test Neo4jVector functionality."""
+import os
+from typing import List
+
+from langchain.docstore.document import Document
+from langchain.vectorstores import Neo4jVector
+from langchain.vectorstores.utils import DistanceStrategy
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
+
+url = os.environ.get("NEO4J_URL", "bolt://localhost:7687")
+username = os.environ.get("NEO4J_USERNAME", "neo4j")
+password = os.environ.get("NEO4J_PASSWORD", "pleaseletmein")
+
+OS_TOKEN_COUNT = 1536
+
+texts = ["foo", "bar", "baz"]
+
+"""
+cd tests/integration_tests/vectorstores/docker-compose
+docker-compose -f neo4j.yml up
+"""
+
+
+def drop_vector_indexes(store: Neo4jVector) -> None:
+    """Cleanup all vector indexes"""
+    all_indexes = store.query(
+        """
+            SHOW INDEXES YIELD name, type
+            WHERE type = "VECTOR"
+            RETURN name
+                              """
+    )
+    for index in all_indexes:
+        store.query(f"DROP INDEX {index['name']}")
+
+
+class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
+    """Fake embeddings functionality for testing."""
+
+    def embed_documents(self, embedding_texts: List[str]) -> List[List[float]]:
+        """Return simple embeddings."""
+        return [
+            [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(i + 1)]
+            for i in range(len(embedding_texts))
+        ]
+
+    def embed_query(self, text: str) -> List[float]:
+        """Return simple embeddings."""
+        return [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(texts.index(text) + 1)]
+
+
+def test_neo4jvector() -> None:
+    """Test end to end construction and search."""
+    docsearch = Neo4jVector.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_euclidean() -> None:
+    """Test euclidean distance"""
+    docsearch = Neo4jVector.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+        distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
+    )
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_embeddings() -> None:
+    """Test end to end construction with embeddings and search."""
+    text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+    text_embedding_pairs = list(zip(texts, text_embeddings))
+    docsearch = Neo4jVector.from_embeddings(
+        text_embeddings=text_embedding_pairs,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_catch_wrong_index_name() -> None:
+    """Test if index name is misspelled, but node label and property are correct."""
+    text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+    text_embedding_pairs = list(zip(texts, text_embeddings))
+    Neo4jVector.from_embeddings(
+        text_embeddings=text_embedding_pairs,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+    existing = Neo4jVector.from_existing_index(
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        index_name="test",
+    )
+    output = existing.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    drop_vector_indexes(existing)
+
+
+def test_neo4jvector_catch_wrong_node_label() -> None:
+    """Test if node label is misspelled, but index name is correct."""
+    text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+    text_embedding_pairs = list(zip(texts, text_embeddings))
+    Neo4jVector.from_embeddings(
+        text_embeddings=text_embedding_pairs,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+    existing = Neo4jVector.from_existing_index(
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        index_name="vector",
+        node_label="test",
+    )
+    output = existing.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    drop_vector_indexes(existing)
+
+
+def test_neo4jvector_with_metadatas() -> None:
+    """Test end to end construction and search."""
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = Neo4jVector.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        metadatas=metadatas,
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo", metadata={"page": "0"})]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_with_metadatas_with_scores() -> None:
+    """Test end to end construction and search."""
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = Neo4jVector.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        metadatas=metadatas,
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+    output = docsearch.similarity_search_with_score("foo", k=1)
+    assert output == [(Document(page_content="foo", metadata={"page": "0"}), 1.0)]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_relevance_score() -> None:
+    """Test to make sure the relevance score is scaled to 0-1."""
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = Neo4jVector.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        metadatas=metadatas,
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+
+    output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
+    assert output == [
+        (Document(page_content="foo", metadata={"page": "0"}), 1.0),
+        (Document(page_content="bar", metadata={"page": "1"}), 0.9998376369476318),
+        (Document(page_content="baz", metadata={"page": "2"}), 0.9993523359298706),
+    ]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_retriever_search_threshold() -> None:
+    """Test using retriever for searching with threshold."""
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = Neo4jVector.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddingsWithOsDimension(),
+        metadatas=metadatas,
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+
+    retriever = docsearch.as_retriever(
+        search_type="similarity_score_threshold",
+        search_kwargs={"k": 3, "score_threshold": 0.9999},
+    )
+    output = retriever.get_relevant_documents("foo")
+    assert output == [
+        Document(page_content="foo", metadata={"page": "0"}),
+    ]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_custom_return_neo4jvector() -> None:
+    """Test end to end construction and search."""
+    docsearch = Neo4jVector.from_texts(
+        texts=["test"],
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+        retrieval_query="RETURN 'foo' AS text, score, {test: 'test'} AS metadata",
+    )
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo", metadata={"test": "test"})]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_prefer_indexname() -> None:
+    """Test using when two indexes are found, prefer by index_name."""
+    Neo4jVector.from_texts(
+        texts=["foo"],
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+
+    Neo4jVector.from_texts(
+        texts=["bar"],
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        index_name="foo",
+        node_label="Test",
+        embedding_node_property="vector",
+        text_node_property="info",
+        pre_delete_collection=True,
+    )
+
+    existing_index = Neo4jVector.from_existing_index(
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        index_name="foo",
+        text_node_property="info",
+    )
+
+    output = existing_index.similarity_search("bar", k=1)
+    assert output == [Document(page_content="bar", metadata={})]
+    drop_vector_indexes(existing_index)
+
+
+def test_neo4jvector_prefer_indexname_insert() -> None:
+    """Test using when two indexes are found, prefer by index_name."""
+    Neo4jVector.from_texts(
+        texts=["baz"],
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+
+    Neo4jVector.from_texts(
+        texts=["foo"],
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        index_name="foo",
+        node_label="Test",
+        embedding_node_property="vector",
+        text_node_property="info",
+        pre_delete_collection=True,
+    )
+
+    existing_index = Neo4jVector.from_existing_index(
+        embedding=FakeEmbeddingsWithOsDimension(),
+        url=url,
+        username=username,
+        password=password,
+        index_name="foo",
+        text_node_property="info",
+    )
+
+    existing_index.add_documents([Document(page_content="bar", metadata={})])
+
+    output = existing_index.similarity_search("bar", k=2)
+    assert output == [
+        Document(page_content="bar", metadata={}),
+        Document(page_content="foo", metadata={}),
+    ]
+    drop_vector_indexes(existing_index)

From d6957921f06c4cf1d1174345a7ea9a1fc04adc4f Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Tue, 29 Aug 2023 08:00:38 -0700
Subject: [PATCH 28/58] bump 276 (#9931)

---
 libs/experimental/pyproject.toml | 2 +-
 libs/langchain/pyproject.toml    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/experimental/pyproject.toml b/libs/experimental/pyproject.toml
index a90522199f7..b9fb69c60ae 100644
--- a/libs/experimental/pyproject.toml
+++ b/libs/experimental/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-experimental"
-version = "0.0.11"
+version = "0.0.12"
 description = "Building applications with LLMs through composability"
 authors = []
 license = "MIT"
diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
index 795b5c4a538..e38c5721ce0 100644
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain"
-version = "0.0.275"
+version = "0.0.276"
 description = "Building applications with LLMs through composability"
 authors = []
 license = "MIT"

From c906041aa8c132be62682d950bba2d698876bc0a Mon Sep 17 00:00:00 2001
From: leo-gan <leo.gan.57@gmail.com>
Date: Tue, 29 Aug 2023 09:58:26 -0700
Subject: [PATCH 29/58] updated notebook titles and text.

---
 docs/extras/modules/memory/adding_memory.ipynb            | 8 +++++---
 .../memory/adding_memory_chain_multiple_inputs.ipynb      | 6 +++---
 docs/extras/modules/memory/agent_with_memory.ipynb        | 6 +++---
 docs/extras/modules/memory/agent_with_memory_in_db.ipynb  | 8 ++++----
 .../modules/memory/conversational_customization.ipynb     | 4 ++--
 docs/extras/modules/memory/custom_memory.ipynb            | 5 +++--
 docs/extras/modules/memory/multiple_memory.ipynb          | 7 ++++---
 7 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/docs/extras/modules/memory/adding_memory.ipynb b/docs/extras/modules/memory/adding_memory.ipynb
index 79e6cbd8daf..d97edc2e05e 100644
--- a/docs/extras/modules/memory/adding_memory.ipynb
+++ b/docs/extras/modules/memory/adding_memory.ipynb
@@ -7,9 +7,11 @@
     "tags": []
    },
    "source": [
-    "# How to add Memory to an LLMChain\n",
+    "# Memory in LLMChain\n",
     "\n",
-    "This notebook goes over how to use the Memory class with an LLMChain. For the purposes of this walkthrough, we will add  the [ConversationBufferMemory](https://api.python.langchain.com/en/latest/memory/langchain.memory.buffer.ConversationBufferMemory.html#langchain.memory.buffer.ConversationBufferMemory) class, although this can be any memory class."
+    "This notebook goes over how to use the Memory class with an LLMChain. \n",
+    "\n",
+    "We will add  the [ConversationBufferMemory](https://api.python.langchain.com/en/latest/memory/langchain.memory.buffer.ConversationBufferMemory.html#langchain.memory.buffer.ConversationBufferMemory) class, although this can be any memory class."
    ]
   },
   {
@@ -321,7 +323,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/modules/memory/adding_memory_chain_multiple_inputs.ipynb b/docs/extras/modules/memory/adding_memory_chain_multiple_inputs.ipynb
index 72aa21b99f3..d7fff83f8e5 100644
--- a/docs/extras/modules/memory/adding_memory_chain_multiple_inputs.ipynb
+++ b/docs/extras/modules/memory/adding_memory_chain_multiple_inputs.ipynb
@@ -5,9 +5,9 @@
    "id": "e42733c5",
    "metadata": {},
    "source": [
-    "# How to add memory to a Multi-Input Chain\n",
+    "# Memory in the Multi-Input Chain\n",
     "\n",
-    "Most memory objects assume a single input. In this notebook, we go over how to add memory to a chain that has multiple inputs. As an example of such a chain, we will add memory to a question/answering chain. This chain takes as inputs both related documents and a user question."
+    "Most memory objects assume a single input. In this notebook, we go over how to add memory to a chain that has multiple inputs. We will add memory to a question/answering chain. This chain takes as inputs both related documents and a user question."
    ]
   },
   {
@@ -178,7 +178,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/modules/memory/agent_with_memory.ipynb b/docs/extras/modules/memory/agent_with_memory.ipynb
index 700c3cf079b..ced545c16a4 100644
--- a/docs/extras/modules/memory/agent_with_memory.ipynb
+++ b/docs/extras/modules/memory/agent_with_memory.ipynb
@@ -5,11 +5,11 @@
    "id": "fa6802ac",
    "metadata": {},
    "source": [
-    "# How to add Memory to an Agent\n",
+    "# Memory in Agent\n",
     "\n",
     "This notebook goes over adding memory to an Agent. Before going through this notebook, please walkthrough the following notebooks, as this will build on top of both of them:\n",
     "\n",
-    "- [Adding memory to an LLM Chain](/docs/modules/memory/how_to/adding_memory.html)\n",
+    "- [Memory in LLMChain](/docs/modules/memory/how_to/adding_memory.html)\n",
     "- [Custom Agents](/docs/modules/agents/how_to/custom_agent.html)\n",
     "\n",
     "In order to add a memory to an agent we are going to the the following steps:\n",
@@ -317,7 +317,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/modules/memory/agent_with_memory_in_db.ipynb b/docs/extras/modules/memory/agent_with_memory_in_db.ipynb
index 800c7699e47..b7bc5bcc86c 100644
--- a/docs/extras/modules/memory/agent_with_memory_in_db.ipynb
+++ b/docs/extras/modules/memory/agent_with_memory_in_db.ipynb
@@ -5,13 +5,13 @@
    "id": "fa6802ac",
    "metadata": {},
    "source": [
-    "# Adding Message Memory backed by a database to an Agent\n",
+    "# Message Memory in Agent backed by a database\n",
     "\n",
     "This notebook goes over adding memory to an Agent where the memory uses an external message store. Before going through this notebook, please walkthrough the following notebooks, as this will build on top of both of them:\n",
     "\n",
-    "- [Adding memory to an LLM Chain](/docs/modules/memory/how_to/adding_memory.html)\n",
+    "- [Memory in LLMChain](/docs/modules/memory/how_to/adding_memory.html)\n",
     "- [Custom Agents](/docs/modules/agents/how_to/custom_agent.html)\n",
-    "- [Agent with Memory](/docs/modules/memory/how_to/agent_with_memory.html)\n",
+    "- [Memory in Agent](/docs/modules/memory/how_to/agent_with_memory.html)\n",
     "\n",
     "In order to add a memory with an external message store to an agent we are going to do the following steps:\n",
     "\n",
@@ -348,7 +348,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/modules/memory/conversational_customization.ipynb b/docs/extras/modules/memory/conversational_customization.ipynb
index f945178ff09..ae6c6ada238 100644
--- a/docs/extras/modules/memory/conversational_customization.ipynb
+++ b/docs/extras/modules/memory/conversational_customization.ipynb
@@ -5,7 +5,7 @@
    "id": "69e35d6f",
    "metadata": {},
    "source": [
-    "# How to customize conversational memory\n",
+    "# Customizing Conversational Memory\n",
     "\n",
     "This notebook walks through a few ways to customize conversational memory."
    ]
@@ -373,7 +373,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/modules/memory/custom_memory.ipynb b/docs/extras/modules/memory/custom_memory.ipynb
index b2c6c161496..79d70c13728 100644
--- a/docs/extras/modules/memory/custom_memory.ipynb
+++ b/docs/extras/modules/memory/custom_memory.ipynb
@@ -5,7 +5,8 @@
    "id": "94e33ebe",
    "metadata": {},
    "source": [
-    "# How to create a custom Memory class\n",
+    "# Custom Memory\n",
+    "\n",
     "Although there are a few predefined types of memory in LangChain, it is highly possible you will want to add your own type of memory that is optimal for your application. This notebook covers how to do that."
    ]
   },
@@ -295,7 +296,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/modules/memory/multiple_memory.ipynb b/docs/extras/modules/memory/multiple_memory.ipynb
index 5f33d0aadbb..bca559f914a 100644
--- a/docs/extras/modules/memory/multiple_memory.ipynb
+++ b/docs/extras/modules/memory/multiple_memory.ipynb
@@ -5,8 +5,9 @@
    "id": "d9fec22e",
    "metadata": {},
    "source": [
-    "# How to use multiple memory classes in the same chain\n",
-    "It is also possible to use multiple memory classes in the same chain. To combine multiple memory classes, we can initialize the `CombinedMemory` class, and then use that."
+    "# Multiple Memory classes\n",
+    "\n",
+    "We can use multiple memory classes in the same chain. To combine multiple memory classes, we initialize and use the `CombinedMemory` class."
    ]
   },
   {
@@ -158,7 +159,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

From 6092422e10a805ee9b5352eed173ef51d73c2eb3 Mon Sep 17 00:00:00 2001
From: Tomaz Bratanic <bratanic.tomaz@gmail.com>
Date: Tue, 29 Aug 2023 19:09:51 +0200
Subject: [PATCH 30/58] Add neo4j provider page (#9941)

---
 docs/extras/integrations/providers/neo4j.mdx | 44 ++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 docs/extras/integrations/providers/neo4j.mdx

diff --git a/docs/extras/integrations/providers/neo4j.mdx b/docs/extras/integrations/providers/neo4j.mdx
new file mode 100644
index 00000000000..3753f7a83e4
--- /dev/null
+++ b/docs/extras/integrations/providers/neo4j.mdx
@@ -0,0 +1,44 @@
+# Neo4j
+
+This page covers how to use the Neo4j ecosystem within LangChain.
+
+What is Neo4j?
+
+**Neo4j in a nutshell:**
+
+- Neo4j is an open-source database management system that specializes in graph database technology.
+- Neo4j allows you to represent and store data in nodes and edges, making it ideal for handling connected data and relationships.
+- Neo4j provides a Cypher Query Language, making it easy to interact with and query your graph data.
+- With Neo4j, you can achieve high-performance graph traversals and queries, suitable for production-level systems.
+- Get started quickly with Neo4j by visiting [their website](https://neo4j.com/).
+
+## Installation and Setup
+
+- Install the Python SDK with `pip install neo4j`
+
+## Wrappers
+
+### VectorStore
+
+There exists a wrapper around Neo4j vector index, allowing you to use it as a vectorstore,
+whether for semantic search or example selection.
+
+To import this vectorstore:
+
+```python
+from langchain.vectorstores import Neo4jVector
+```
+
+For a more detailed walkthrough of the Neo4j vector index wrapper, see [this notebook](/docs/integrations/vectorstores/neo4jvector.html)
+
+### GraphCypherQAChain
+
+There exists a wrapper around Neo4j graph database that allows you to generate Cypher statements based on the user input
+and use them to retrieve relevant information from the database.
+
+```python
+from langchain.graphs import Neo4jGraph
+from langchain.chains import GraphCypherQAChain
+```
+
+For a more detailed walkthrough of Cypher generating chain, see [this notebook](/docs/extras/use_cases/more/graph/graph_cypher_qa.html)

From f5faac8859e8adbfa52b9c6e6cfb7a2d33ca66af Mon Sep 17 00:00:00 2001
From: Jan-Luca Barthel <63152896+JL-B@users.noreply.github.com>
Date: Tue, 29 Aug 2023 19:29:51 +0200
Subject: [PATCH 31/58] addition of cosine distance function for faiss (#9939)

- Description: added the _cosine_relevance_score_fn to
_select_relevance_score_fn of faiss.py to enable the use of cosine
distance for similarity for this vector store and to comply with the
Error Message, that implies, that cosine should be a valid distance
strategy
- Issue: no relevant Issue found, but needed this function myself and
tested it in a private repo
  - Dependencies: none
---
 libs/langchain/langchain/vectorstores/faiss.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libs/langchain/langchain/vectorstores/faiss.py b/libs/langchain/langchain/vectorstores/faiss.py
index a8c835e5ca2..ec95c67e0b9 100644
--- a/libs/langchain/langchain/vectorstores/faiss.py
+++ b/libs/langchain/langchain/vectorstores/faiss.py
@@ -736,6 +736,8 @@ class FAISS(VectorStore):
         elif self.distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE:
             # Default behavior is to use euclidean distance relevancy
             return self._euclidean_relevance_score_fn
+        elif self.distance_strategy == DistanceStrategy.COSINE:
+            return self._cosine_relevance_score_fn
         else:
             raise ValueError(
                 "Unknown distance strategy, must be cosine, max_inner_product,"

From b5cd1e0fed13c227171903e89d327b9c9d482a49 Mon Sep 17 00:00:00 2001
From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com>
Date: Tue, 29 Aug 2023 13:51:56 -0400
Subject: [PATCH 32/58] Add security notices on PAL and CPAL experimental
 chains. (#9938)

Clearly document that the PAL and CPAL techniques involve generating
code, and that such code must be properly sandboxed and given
appropriate narrowly-scoped credentials in order to ensure security.

While our implementations include some mitigations, Python and SQL
sandboxing is well-known to be a very hard problem and our mitigations
are no replacement for proper sandboxing and permissions management. The
implementation of such techniques must be performed outside the scope of
the Python process where this package's code runs, so its correct setup
and administration must therefore be the responsibility of the user of
this code.
---
 .../langchain_experimental/cpal/base.py       | 35 +++++++++++++++++--
 .../langchain_experimental/pal_chain/base.py  |  9 +++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/libs/experimental/langchain_experimental/cpal/base.py b/libs/experimental/langchain_experimental/cpal/base.py
index bba59d5d32a..4ee817f357e 100644
--- a/libs/experimental/langchain_experimental/cpal/base.py
+++ b/libs/experimental/langchain_experimental/cpal/base.py
@@ -131,13 +131,34 @@ class InterventionChain(_BaseStoryElementChain):
 
 
 class QueryChain(_BaseStoryElementChain):
-    """Query the outcome table using SQL."""
+    """Query the outcome table using SQL.
+
+    *Security note*: This class implements an AI technique that generates SQL code.
+        If those SQL commands are executed, it's critical to ensure they use credentials
+        that are narrowly-scoped to only include the permissions this chain needs.
+        Failure to do so may result in data corruption or loss, since this chain may
+        attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this chain.
+    """
 
     pydantic_model: ClassVar[Type[pydantic.BaseModel]] = QueryModel
     template: ClassVar[str] = query_template  # TODO: incl. table schema
 
 
 class CPALChain(_BaseStoryElementChain):
+    """Causal program-aided language (CPAL) chain implementation.
+
+    *Security note*: The building blocks of this class include the implementation
+        of an AI technique that generates SQL code. If those SQL commands
+        are executed, it's critical to ensure they use credentials that
+        are narrowly-scoped to only include the permissions this chain needs.
+        Failure to do so may result in data corruption or loss, since this chain may
+        attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this chain.
+    """
+
     llm: BaseLanguageModel
     narrative_chain: Optional[NarrativeChain] = None
     causal_chain: Optional[CausalChain] = None
@@ -151,7 +172,17 @@ class CPALChain(_BaseStoryElementChain):
         llm: BaseLanguageModel,
         **kwargs: Any,
     ) -> CPALChain:
-        """instantiation depends on component chains"""
+        """instantiation depends on component chains
+
+        *Security note*: The building blocks of this class include the implementation
+            of an AI technique that generates SQL code. If those SQL commands
+            are executed, it's critical to ensure they use credentials that
+            are narrowly-scoped to only include the permissions this chain needs.
+            Failure to do so may result in data corruption or loss, since this chain may
+            attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+            The best way to guard against such negative outcomes is to (as appropriate)
+            limit the permissions granted to the credentials used with this chain.
+        """
         return cls(
             llm=llm,
             chain=LLMChain(
diff --git a/libs/experimental/langchain_experimental/pal_chain/base.py b/libs/experimental/langchain_experimental/pal_chain/base.py
index 6717c9f4a13..275f9e93087 100644
--- a/libs/experimental/langchain_experimental/pal_chain/base.py
+++ b/libs/experimental/langchain_experimental/pal_chain/base.py
@@ -90,6 +90,15 @@ class PALChain(Chain):
     This class implements the Program-Aided Language Models (PAL) for generating code
     solutions. PAL is a technique described in the paper "Program-Aided Language Models"
     (https://arxiv.org/pdf/2211.10435.pdf).
+
+    *Security note*: This class implements an AI technique that generates and evaluates
+        Python code, which can be dangerous and requires a specially sandboxed
+        environment to be safely used. While this class implements some basic guardrails
+        by limiting available locals/globals and by parsing and inspecting
+        the generated Python AST using `PALValidation`, those guardrails will not
+        deter sophisticated attackers and are not a replacement for a proper sandbox.
+        Do not use this class on untrusted inputs, with elevated permissions,
+        or without consulting your security team about proper sandboxing!
     """
 
     llm_chain: LLMChain

From 8dbf4cbe80f93f2ed427d8331c2bb575564f7213 Mon Sep 17 00:00:00 2001
From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com>
Date: Tue, 29 Aug 2023 14:21:30 -0400
Subject: [PATCH 33/58] Add notice about security-sensitive experimental code
 to experimental README. (#9936)

It renders like this:
https://github.com/langchain-ai/langchain/tree/pg/experimental-readme/libs/experimental


![image](https://github.com/langchain-ai/langchain/assets/2348618/a5f9569d-96f6-44c6-8559-921adb3e337d)
---
 libs/experimental/README.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/libs/experimental/README.md b/libs/experimental/README.md
index c65686df78c..adf8a06422c 100644
--- a/libs/experimental/README.md
+++ b/libs/experimental/README.md
@@ -1,3 +1,16 @@
 # 🦜️🧪 LangChain Experimental
 
-This repository holds more experimental LangChain code.
\ No newline at end of file
+This package holds experimental LangChain code, intended for research and experimental
+uses.
+
+> [!WARNING]
+> Portions of the code in this package may be dangerous if not properly deployed
+> in a sandboxed environment. Please be wary of deploying experimental code
+> to production unless you've taken appropriate precautions and
+> have already discussed it with your security team.
+
+Some of the code here may be marked with security notices. However,
+given the exploratory and experimental nature of the code in this package,
+the lack of a security notice on a piece of code does not mean that
+the code in question does not require additional security considerations
+in order to be safe to use.

From d578efba3575795bb29679e2fe7f0f0f440ab74f Mon Sep 17 00:00:00 2001
From: leo-gan <leo.gan.57@gmail.com>
Date: Tue, 29 Aug 2023 11:25:53 -0700
Subject: [PATCH 34/58] updated notebook titles and text.

---
 docs/extras/modules/memory/types/kg.ipynb       | 17 ++++++++++++-----
 .../modules/memory/types/summary_buffer.ipynb   | 15 ++++++++++++---
 .../modules/memory/types/token_buffer.ipynb     | 12 ++++++++++--
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/docs/extras/modules/memory/types/kg.ipynb b/docs/extras/modules/memory/types/kg.ipynb
index 3c0f45d076d..1d8c27ba209 100644
--- a/docs/extras/modules/memory/types/kg.ipynb
+++ b/docs/extras/modules/memory/types/kg.ipynb
@@ -5,11 +5,17 @@
    "id": "44c9933a",
    "metadata": {},
    "source": [
-    "# Conversation Knowledge Graph Memory\n",
+    "# Conversation Knowledge Graph\n",
     "\n",
-    "This type of memory uses a knowledge graph to recreate memory.\n",
-    "\n",
-    "Let's first walk through how to use the utilities"
+    "This type of memory uses a knowledge graph to recreate memory.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c798006-ca04-4de3-83eb-cf167fb2bd01",
+   "metadata": {},
+   "source": [
+    "## Using memory with LLM"
    ]
   },
   {
@@ -162,6 +168,7 @@
    "metadata": {},
    "source": [
     "## Using in a chain\n",
+    "\n",
     "Let's now use this in a chain!"
    ]
   },
@@ -348,7 +355,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/modules/memory/types/summary_buffer.ipynb b/docs/extras/modules/memory/types/summary_buffer.ipynb
index 570361e080e..b27122905e6 100644
--- a/docs/extras/modules/memory/types/summary_buffer.ipynb
+++ b/docs/extras/modules/memory/types/summary_buffer.ipynb
@@ -5,13 +5,22 @@
    "id": "ff4be5f3",
    "metadata": {},
    "source": [
-    "# ConversationSummaryBufferMemory\n",
+    "# Conversation Summary Buffer\n",
     "\n",
-    "`ConversationSummaryBufferMemory` combines the last two ideas. It keeps a buffer of recent interactions in memory, but rather than just completely flushing old interactions it compiles them into a summary and uses both. Unlike the previous implementation though, it uses token length rather than number of interactions to determine when to flush interactions.\n",
+    "`ConversationSummaryBufferMemory` combines the two ideas. It keeps a buffer of recent interactions in memory, but rather than just completely flushing old interactions it compiles them into a summary and uses both. \n",
+    "It uses token length rather than number of interactions to determine when to flush interactions.\n",
     "\n",
     "Let's first walk through how to use the utilities"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "0309636e-a530-4d2a-ba07-0916ea18bb20",
+   "metadata": {},
+   "source": [
+    "## Using memory with LLM"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -320,7 +329,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/modules/memory/types/token_buffer.ipynb b/docs/extras/modules/memory/types/token_buffer.ipynb
index ba26ef79ca7..73c39b4c84b 100644
--- a/docs/extras/modules/memory/types/token_buffer.ipynb
+++ b/docs/extras/modules/memory/types/token_buffer.ipynb
@@ -5,13 +5,21 @@
    "id": "ff4be5f3",
    "metadata": {},
    "source": [
-    "# ConversationTokenBufferMemory\n",
+    "# Conversation Token Buffer\n",
     "\n",
     "`ConversationTokenBufferMemory` keeps a buffer of recent interactions in memory, and uses token length rather than number of interactions to determine when to flush interactions.\n",
     "\n",
     "Let's first walk through how to use the utilities"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "0e528ef0-7b04-4a4a-8ff2-493c02027e83",
+   "metadata": {},
+   "source": [
+    "## Using memory with LLM"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -286,7 +294,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

From 7c7ae34eeb0798960184be20930e04e36b6badcd Mon Sep 17 00:00:00 2001
From: leo-gan <leo.gan.57@gmail.com>
Date: Tue, 29 Aug 2023 11:33:30 -0700
Subject: [PATCH 35/58] updated .mdx titles and text.

---
 docs/docs_skeleton/docs/modules/memory/types/buffer.mdx         | 2 +-
 docs/docs_skeleton/docs/modules/memory/types/buffer_window.mdx  | 2 +-
 .../docs/modules/memory/types/entity_summary_memory.mdx         | 2 +-
 docs/docs_skeleton/docs/modules/memory/types/index.mdx          | 2 +-
 docs/docs_skeleton/docs/modules/memory/types/summary.mdx        | 2 +-
 .../docs/modules/memory/types/vectorstore_retriever_memory.mdx  | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/docs_skeleton/docs/modules/memory/types/buffer.mdx b/docs/docs_skeleton/docs/modules/memory/types/buffer.mdx
index 51ef409d75b..d417b631746 100644
--- a/docs/docs_skeleton/docs/modules/memory/types/buffer.mdx
+++ b/docs/docs_skeleton/docs/modules/memory/types/buffer.mdx
@@ -1,4 +1,4 @@
-# Conversation buffer memory
+# Conversation Buffer
 
 This notebook shows how to use `ConversationBufferMemory`. This memory allows for storing of messages and then extracts the messages in a variable.
 
diff --git a/docs/docs_skeleton/docs/modules/memory/types/buffer_window.mdx b/docs/docs_skeleton/docs/modules/memory/types/buffer_window.mdx
index fab7ed42ba1..465e918b0ab 100644
--- a/docs/docs_skeleton/docs/modules/memory/types/buffer_window.mdx
+++ b/docs/docs_skeleton/docs/modules/memory/types/buffer_window.mdx
@@ -1,4 +1,4 @@
-# Conversation buffer window memory
+# Conversation Buffer Window
 
 `ConversationBufferWindowMemory` keeps a list of the interactions of the conversation over time. It only uses the last K interactions. This can be useful for keeping a sliding window of the most recent interactions, so the buffer does not get too large
 
diff --git a/docs/docs_skeleton/docs/modules/memory/types/entity_summary_memory.mdx b/docs/docs_skeleton/docs/modules/memory/types/entity_summary_memory.mdx
index 5387cf575a5..e3dc63b6dde 100644
--- a/docs/docs_skeleton/docs/modules/memory/types/entity_summary_memory.mdx
+++ b/docs/docs_skeleton/docs/modules/memory/types/entity_summary_memory.mdx
@@ -1,4 +1,4 @@
-# Entity memory
+# Entity
 
 Entity Memory remembers given facts about specific entities in a conversation. It extracts information on entities (using an LLM) and builds up its knowledge about that entity over time (also using an LLM).
 
diff --git a/docs/docs_skeleton/docs/modules/memory/types/index.mdx b/docs/docs_skeleton/docs/modules/memory/types/index.mdx
index c9f29673f6b..5caaa4e3f7e 100644
--- a/docs/docs_skeleton/docs/modules/memory/types/index.mdx
+++ b/docs/docs_skeleton/docs/modules/memory/types/index.mdx
@@ -4,5 +4,5 @@ sidebar_position: 2
 # Memory Types
 
 There are many different types of memory.
-Each have their own parameters, their own return types, and are useful in different scenarios.
+Each has their own parameters, their own return types, and is useful in different scenarios.
 Please see their individual page for more detail on each one.
diff --git a/docs/docs_skeleton/docs/modules/memory/types/summary.mdx b/docs/docs_skeleton/docs/modules/memory/types/summary.mdx
index 330bd2b59b1..7d39b44e2b2 100644
--- a/docs/docs_skeleton/docs/modules/memory/types/summary.mdx
+++ b/docs/docs_skeleton/docs/modules/memory/types/summary.mdx
@@ -1,4 +1,4 @@
-# Conversation summary memory
+# Conversation Summary
 Now let's take a look at using a slightly more complex type of memory - `ConversationSummaryMemory`. This type of memory creates a summary of the conversation over time. This can be useful for condensing information from the conversation over time.
 Conversation summary memory summarizes the conversation as it happens and stores the current summary in memory. This memory can then be used to inject the summary of the conversation so far into a prompt/chain. This memory is most useful for longer conversations, where keeping the past message history in the prompt verbatim would take up too many tokens.
 
diff --git a/docs/docs_skeleton/docs/modules/memory/types/vectorstore_retriever_memory.mdx b/docs/docs_skeleton/docs/modules/memory/types/vectorstore_retriever_memory.mdx
index 6f71e624b3a..76c05cd95d0 100644
--- a/docs/docs_skeleton/docs/modules/memory/types/vectorstore_retriever_memory.mdx
+++ b/docs/docs_skeleton/docs/modules/memory/types/vectorstore_retriever_memory.mdx
@@ -1,4 +1,4 @@
-# Vector store-backed memory
+# Backed by a Vector Store
 
 `VectorStoreRetrieverMemory` stores memories in a VectorDB and queries the top-K most "salient" docs every time it is called.
 

From 0fb95ebe6688d3f2c2524cf078ac1f1534c17a5f Mon Sep 17 00:00:00 2001
From: Corvus Lee <51771215+corvuslee@users.noreply.github.com>
Date: Tue, 29 Aug 2023 19:38:52 +0100
Subject: [PATCH 36/58] Docs: enrich SageMaker endpoint embeddings with
 docstrings and examples (#9924)

Description: added comments to address the relationship between
input/output transformations and the customised inference.py script.
---
 .../text_embedding/sagemaker-endpoint.ipynb   | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/docs/extras/integrations/text_embedding/sagemaker-endpoint.ipynb b/docs/extras/integrations/text_embedding/sagemaker-endpoint.ipynb
index 96d09be4b16..fe5299ae6f2 100644
--- a/docs/extras/integrations/text_embedding/sagemaker-endpoint.ipynb
+++ b/docs/extras/integrations/text_embedding/sagemaker-endpoint.ipynb
@@ -48,10 +48,31 @@
     "    accepts = \"application/json\"\n",
     "\n",
     "    def transform_input(self, inputs: list[str], model_kwargs: Dict) -> bytes:\n",
-    "        input_str = json.dumps({\"inputs\": inputs, **model_kwargs})\n",
+    "        \"\"\"\n",
+    "        Transforms the input into bytes that can be consumed by SageMaker endpoint.\n",
+    "        Args:\n",
+    "            inputs: List of input strings.\n",
+    "            model_kwargs: Additional keyword arguments to be passed to the endpoint.\n",
+    "        Returns:\n",
+    "            The transformed bytes input.\n",
+    "        \"\"\"\n",
+    "        # Example: inference.py expects a JSON string with a \"inputs\" key:\n",
+    "        input_str = json.dumps({\"inputs\": inputs, **model_kwargs})  \n",
     "        return input_str.encode(\"utf-8\")\n",
     "\n",
     "    def transform_output(self, output: bytes) -> List[List[float]]:\n",
+    "        \"\"\"\n",
+    "        Transforms the bytes output from the endpoint into a list of embeddings.\n",
+    "        Args:\n",
+    "            output: The bytes output from SageMaker endpoint.\n",
+    "        Returns:\n",
+    "            The transformed output - list of embeddings\n",
+    "        Note:\n",
+    "            The length of the outer list is the number of input strings.\n",
+    "            The length of the inner lists is the embedding dimension.\n",
+    "        \"\"\"\n",
+    "        # Example: inference.py returns a JSON string with the list of\n",
+    "        # embeddings in a \"vectors\" key:\n",
     "        response_json = json.loads(output.read().decode(\"utf-8\"))\n",
     "        return response_json[\"vectors\"]\n",
     "\n",
@@ -60,7 +81,6 @@
     "\n",
     "\n",
     "embeddings = SagemakerEndpointEmbeddings(\n",
-    "    # endpoint_name=\"endpoint-name\",\n",
     "    # credentials_profile_name=\"credentials-profile-name\",\n",
     "    endpoint_name=\"huggingface-pytorch-inference-2023-03-21-16-14-03-834\",\n",
     "    region_name=\"us-east-1\",\n",

From ede45f535e4459bf508563d4eb2f9ae65670e810 Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Tue, 29 Aug 2023 11:50:07 -0700
Subject: [PATCH 37/58] fix intro docs (#9950)

---
 docs/docs_skeleton/docs/get_started/introduction.mdx | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/docs/docs_skeleton/docs/get_started/introduction.mdx b/docs/docs_skeleton/docs/get_started/introduction.mdx
index 03193cdf176..e1af50a12bd 100644
--- a/docs/docs_skeleton/docs/get_started/introduction.mdx
+++ b/docs/docs_skeleton/docs/get_started/introduction.mdx
@@ -42,9 +42,9 @@ Log and stream intermediate steps of any chain
 ## Examples, ecosystem, and resources
 ### [Use cases](/docs/use_cases/)
 Walkthroughs and best-practices for common end-to-end use cases, like:
-- [Chatbots](/docs/use_cases/chatbots/)
+- [Chatbots](/docs/use_cases/chatbots)
 - [Answering questions using sources](/docs/use_cases/question_answering/)
-- [Analyzing structured data](/docs/use_cases/tabular.html)
+- [Analyzing structured data](/docs/use_cases/sql)
 - and much more...
 
 ### [Guides](/docs/guides/)
@@ -56,9 +56,8 @@ LangChain is part of a rich ecosystem of tools that integrate with our framework
 ### [Additional resources](/docs/additional_resources/)
 Our community is full of prolific developers, creative builders, and fantastic teachers. Check out [YouTube tutorials](/docs/additional_resources/youtube.html) for great tutorials from folks in the community, and [Gallery](https://github.com/kyrolabs/awesome-langchain) for a list of awesome LangChain projects, compiled by the folks at [KyroLabs](https://kyrolabs.com).
 
-<h3><span style={{color:"#2e8555"}}> Support </span></h3>
-
-Join us on [GitHub](https://github.com/hwchase17/langchain) or [Discord](https://discord.gg/6adMQxSpJS) to ask questions, share feedback, meet other developers building with LangChain, and dream about the future of LLM’s.
+### [Community](/docs/community)
+Head to the [Community navigator](/docs/community) to find places to ask questions, share feedback, meet other developers, and dream about the future of LLM’s.
 
 ## API reference
 

From 8bd7a9d18eeb23de9d70bf34971f33e5a3529a77 Mon Sep 17 00:00:00 2001
From: Zizhong Zhang <zizhong@opaque.co>
Date: Tue, 29 Aug 2023 12:22:30 -0700
Subject: [PATCH 38/58] feat: PromptGuard takes a list of str (#9948)

Recently we made the decision that PromptGuard takes a list of strings
instead of a string.
@ggroode implemented the integration change.

---------

Co-authored-by: ggroode <ggroode@berkeley.edu>
Co-authored-by: ggroode <46691276+ggroode@users.noreply.github.com>
---
 libs/langchain/langchain/llms/promptguard.py      |  4 ++--
 libs/langchain/langchain/utilities/promptguard.py | 10 ++++------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/libs/langchain/langchain/llms/promptguard.py b/libs/langchain/langchain/llms/promptguard.py
index 9f99ff40193..9dcdfcb6af7 100644
--- a/libs/langchain/langchain/llms/promptguard.py
+++ b/libs/langchain/langchain/llms/promptguard.py
@@ -90,8 +90,8 @@ class PromptGuard(LLM):
         _run_manager = run_manager or CallbackManagerForLLMRun.get_noop_manager()
 
         # sanitize the prompt by replacing the sensitive information with a placeholder
-        sanitize_response: pg.SanitizeResponse = pg.sanitize(prompt)
-        sanitized_prompt_value_str = sanitize_response.sanitized_text
+        sanitize_response: pg.SanitizeResponse = pg.sanitize([prompt])
+        sanitized_prompt_value_str = sanitize_response.sanitized_texts[0]
 
         # TODO: Add in callbacks once child runs for LLMs are supported by LangSmith.
         # call the LLM with the sanitized prompt and get the response
diff --git a/libs/langchain/langchain/utilities/promptguard.py b/libs/langchain/langchain/utilities/promptguard.py
index e89d3b690ec..df29cafa4c7 100644
--- a/libs/langchain/langchain/utilities/promptguard.py
+++ b/libs/langchain/langchain/utilities/promptguard.py
@@ -1,4 +1,3 @@
-import json
 from typing import Dict, Union
 
 
@@ -41,9 +40,9 @@ def sanitize(
 
     if isinstance(input, str):
         # the input could be a string, so we sanitize the string
-        sanitize_response: pg.SanitizeResponse = pg.sanitize(input)
+        sanitize_response: pg.SanitizeResponse = pg.sanitize([input])
         return {
-            "sanitized_input": sanitize_response.sanitized_text,
+            "sanitized_input": sanitize_response.sanitized_texts[0],
             "secure_context": sanitize_response.secure_context,
         }
 
@@ -54,13 +53,12 @@ def sanitize(
         # get the values from the dict
         for key in input:
             values.append(input[key])
-        input_value_str = json.dumps(values)
 
         # sanitize the values
-        sanitize_values_response: pg.SanitizeResponse = pg.sanitize(input_value_str)
+        sanitize_values_response: pg.SanitizeResponse = pg.sanitize(values)
 
         # reconstruct the dict with the sanitized values
-        sanitized_input_values = json.loads(sanitize_values_response.sanitized_text)
+        sanitized_input_values = sanitize_values_response.sanitized_texts
         idx = 0
         sanitized_input = dict()
         for key in input:

From fbd792ac7c4d5c62c7b2d6dac9f491be48ed4e4e Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:38:42 -0700
Subject: [PATCH 39/58] Fix import (#9945)

---
 libs/langchain/langchain/chat_loaders/imessage.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/chat_loaders/imessage.py b/libs/langchain/langchain/chat_loaders/imessage.py
index 10b7f778c80..df2ed95577a 100644
--- a/libs/langchain/langchain/chat_loaders/imessage.py
+++ b/libs/langchain/langchain/chat_loaders/imessage.py
@@ -37,7 +37,7 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
         if not self.db_path.exists():
             raise FileNotFoundError(f"File {self.db_path} not found")
         try:
-            pass  # type: ignore
+            import sqlite3  # noqa: F401
         except ImportError as e:
             raise ImportError(
                 "The sqlite3 module is required to load iMessage chats.\n"
@@ -93,6 +93,7 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
         Yields:
             ChatSession: Loaded chat session.
         """
+        import sqlite3
 
         try:
             conn = sqlite3.connect(self.db_path)

From 7cbe872af88724d2bba3299af787dd310050a53a Mon Sep 17 00:00:00 2001
From: Guy Korland <gkorland@gmail.com>
Date: Wed, 30 Aug 2023 00:22:33 +0300
Subject: [PATCH 40/58] Add support for Falkordb (ex-RedisGraph) (#9821)

Replace this entire comment with:
  - Description: Add support for Falkordb (ex-RedisGraph)
  - Tag maintainer: @hwchase17
  - Twitter handle: @g_korland
---
 .../more/graph/graph_falkordb_qa.ipynb        | 154 ++++++++++++++++++
 libs/langchain/langchain/chains/__init__.py   |   2 +
 .../langchain/chains/graph_qa/falkordb.py     | 141 ++++++++++++++++
 libs/langchain/langchain/graphs/__init__.py   |   2 +
 .../langchain/graphs/falkordb_graph.py        |  67 ++++++++
 .../integration_tests/graphs/test_falkordb.py |  34 ++++
 6 files changed, 400 insertions(+)
 create mode 100644 docs/extras/use_cases/more/graph/graph_falkordb_qa.ipynb
 create mode 100644 libs/langchain/langchain/chains/graph_qa/falkordb.py
 create mode 100644 libs/langchain/langchain/graphs/falkordb_graph.py
 create mode 100644 libs/langchain/tests/integration_tests/graphs/test_falkordb.py

diff --git a/docs/extras/use_cases/more/graph/graph_falkordb_qa.ipynb b/docs/extras/use_cases/more/graph/graph_falkordb_qa.ipynb
new file mode 100644
index 00000000000..c43e2329750
--- /dev/null
+++ b/docs/extras/use_cases/more/graph/graph_falkordb_qa.ipynb
@@ -0,0 +1,154 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# FalkorDBQAChain"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook shows how to use LLMs to provide a natural language interface to FalkorDB database.\n",
+    "\n",
+    "FalkorDB is a low latency property graph database management system. You can simply run its docker locally:\n",
+    "\n",
+    "```bash\n",
+    "docker run -p 6379:6379 -it --rm falkordb/falkordb:edge\n",
+    "```\n",
+    "\n",
+    "Once launched, you can simply start creating a database on the local machine and connect to it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.graphs import FalkorDBGraph\n",
+    "from langchain.chains import FalkorDBQAChain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "graph = FalkorDBGraph(database=\"movies\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "graph.query(\n",
+    "    \"\"\"\n",
+    "MERGE (m:Movie {name:\"Top Gun\"})\n",
+    "WITH m\n",
+    "UNWIND [\"Tom Cruise\", \"Val Kilmer\", \"Anthony Edwards\", \"Meg Ryan\"] AS actor\n",
+    "MERGE (a:Actor {name:actor})\n",
+    "MERGE (a)-[:ACTED_IN]->(m)\n",
+    "\"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "graph.refresh_schema()\n",
+    "import os\n",
+    "os.environ['OPENAI_API_KEY']='API_KEY_HERE'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = FalkorDBQAChain.from_llm(\n",
+    "    ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new FalkorDBQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (:Movie {title: 'Top Gun'})<-[:ACTED_IN]-(actor:Person)\n",
+      "RETURN actor.name AS output\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'The actor who played in Top Gun is Tom Cruise.'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.run(\"Who played in Top Gun?\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/libs/langchain/langchain/chains/__init__.py b/libs/langchain/langchain/chains/__init__.py
index 8564d6851a3..4bb5242729b 100644
--- a/libs/langchain/langchain/chains/__init__.py
+++ b/libs/langchain/langchain/chains/__init__.py
@@ -36,6 +36,7 @@ from langchain.chains.flare.base import FlareChain
 from langchain.chains.graph_qa.arangodb import ArangoGraphQAChain
 from langchain.chains.graph_qa.base import GraphQAChain
 from langchain.chains.graph_qa.cypher import GraphCypherQAChain
+from langchain.chains.graph_qa.falkordb import FalkorDBQAChain
 from langchain.chains.graph_qa.hugegraph import HugeGraphQAChain
 from langchain.chains.graph_qa.kuzu import KuzuQAChain
 from langchain.chains.graph_qa.nebulagraph import NebulaGraphQAChain
@@ -85,6 +86,7 @@ __all__ = [
     "ConstitutionalChain",
     "ConversationChain",
     "ConversationalRetrievalChain",
+    "FalkorDBQAChain",
     "FlareChain",
     "GraphCypherQAChain",
     "GraphQAChain",
diff --git a/libs/langchain/langchain/chains/graph_qa/falkordb.py b/libs/langchain/langchain/chains/graph_qa/falkordb.py
new file mode 100644
index 00000000000..b5ad8da4876
--- /dev/null
+++ b/libs/langchain/langchain/chains/graph_qa/falkordb.py
@@ -0,0 +1,141 @@
+"""Question answering over a graph."""
+from __future__ import annotations
+
+import re
+from typing import Any, Dict, List, Optional
+
+from langchain.base_language import BaseLanguageModel
+from langchain.callbacks.manager import CallbackManagerForChainRun
+from langchain.chains.base import Chain
+from langchain.chains.graph_qa.prompts import CYPHER_GENERATION_PROMPT, CYPHER_QA_PROMPT
+from langchain.chains.llm import LLMChain
+from langchain.graphs import FalkorDBGraph
+from langchain.pydantic_v1 import Field
+from langchain.schema import BasePromptTemplate
+
+INTERMEDIATE_STEPS_KEY = "intermediate_steps"
+
+
+def extract_cypher(text: str) -> str:
+    """
+    Extract Cypher code from a text.
+    Args:
+        text: Text to extract Cypher code from.
+
+    Returns:
+        Cypher code extracted from the text.
+    """
+    # The pattern to find Cypher code enclosed in triple backticks
+    pattern = r"```(.*?)```"
+
+    # Find all matches in the input text
+    matches = re.findall(pattern, text, re.DOTALL)
+
+    return matches[0] if matches else text
+
+
+class FalkorDBQAChain(Chain):
+    """Chain for question-answering against a graph by generating Cypher statements."""
+
+    graph: FalkorDBGraph = Field(exclude=True)
+    cypher_generation_chain: LLMChain
+    qa_chain: LLMChain
+    input_key: str = "query"  #: :meta private:
+    output_key: str = "result"  #: :meta private:
+    top_k: int = 10
+    """Number of results to return from the query"""
+    return_intermediate_steps: bool = False
+    """Whether or not to return the intermediate steps along with the final answer."""
+    return_direct: bool = False
+    """Whether or not to return the result of querying the graph directly."""
+
+    @property
+    def input_keys(self) -> List[str]:
+        """Return the input keys.
+
+        :meta private:
+        """
+        return [self.input_key]
+
+    @property
+    def output_keys(self) -> List[str]:
+        """Return the output keys.
+
+        :meta private:
+        """
+        _output_keys = [self.output_key]
+        return _output_keys
+
+    @property
+    def _chain_type(self) -> str:
+        return "graph_cypher_chain"
+
+    @classmethod
+    def from_llm(
+        cls,
+        llm: BaseLanguageModel,
+        *,
+        qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
+        cypher_prompt: BasePromptTemplate = CYPHER_GENERATION_PROMPT,
+        **kwargs: Any,
+    ) -> FalkorDBQAChain:
+        """Initialize from LLM."""
+        qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
+        cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt)
+
+        return cls(
+            qa_chain=qa_chain,
+            cypher_generation_chain=cypher_generation_chain,
+            **kwargs,
+        )
+
+    def _call(
+        self,
+        inputs: Dict[str, Any],
+        run_manager: Optional[CallbackManagerForChainRun] = None,
+    ) -> Dict[str, Any]:
+        """Generate Cypher statement, use it to look up in db and answer question."""
+        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
+        callbacks = _run_manager.get_child()
+        question = inputs[self.input_key]
+
+        intermediate_steps: List = []
+
+        generated_cypher = self.cypher_generation_chain.run(
+            {"question": question, "schema": self.graph.schema}, callbacks=callbacks
+        )
+
+        # Extract Cypher code if it is wrapped in backticks
+        generated_cypher = extract_cypher(generated_cypher)
+
+        _run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
+        _run_manager.on_text(
+            generated_cypher, color="green", end="\n", verbose=self.verbose
+        )
+
+        intermediate_steps.append({"query": generated_cypher})
+
+        # Retrieve and limit the number of results
+        context = self.graph.query(generated_cypher)[: self.top_k]
+
+        if self.return_direct:
+            final_result = context
+        else:
+            _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
+            _run_manager.on_text(
+                str(context), color="green", end="\n", verbose=self.verbose
+            )
+
+            intermediate_steps.append({"context": context})
+
+            result = self.qa_chain(
+                {"question": question, "context": context},
+                callbacks=callbacks,
+            )
+            final_result = result[self.qa_chain.output_key]
+
+        chain_result: Dict[str, Any] = {self.output_key: final_result}
+        if self.return_intermediate_steps:
+            chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
+
+        return chain_result
diff --git a/libs/langchain/langchain/graphs/__init__.py b/libs/langchain/langchain/graphs/__init__.py
index 9699750db2f..b2a43168c9a 100644
--- a/libs/langchain/langchain/graphs/__init__.py
+++ b/libs/langchain/langchain/graphs/__init__.py
@@ -1,6 +1,7 @@
 """**Graphs** provide a natural language interface to graph databases."""
 
 from langchain.graphs.arangodb_graph import ArangoGraph
+from langchain.graphs.falkordb_graph import FalkorDBGraph
 from langchain.graphs.hugegraph import HugeGraph
 from langchain.graphs.kuzu_graph import KuzuGraph
 from langchain.graphs.memgraph_graph import MemgraphGraph
@@ -20,4 +21,5 @@ __all__ = [
     "HugeGraph",
     "RdfGraph",
     "ArangoGraph",
+    "FalkorDBGraph",
 ]
diff --git a/libs/langchain/langchain/graphs/falkordb_graph.py b/libs/langchain/langchain/graphs/falkordb_graph.py
new file mode 100644
index 00000000000..189189f6414
--- /dev/null
+++ b/libs/langchain/langchain/graphs/falkordb_graph.py
@@ -0,0 +1,67 @@
+from typing import Any, Dict, List
+
+node_properties_query = """
+MATCH (n)
+UNWIND labels(n) as l
+UNWIND keys(n) as p
+RETURN {label:l, properties: collect(distinct p)} AS output
+"""
+
+rel_properties_query = """
+MATCH ()-[r]->()
+UNWIND keys(r) as p
+RETURN {type:type(r), properties: collect(distinct p)} AS output
+"""
+
+rel_query = """
+MATCH (n)-[r]->(m)
+WITH labels(n)[0] AS src, labels(m)[0] AS dst, type(r) AS type
+RETURN DISTINCT "(:" + src + ")-[:" + type + "]->(:" + dst + ")" AS output
+"""
+
+
+class FalkorDBGraph:
+    """FalkorDB wrapper for graph operations."""
+
+    def __init__(
+        self, database: str, host: str = "localhost", port: int = 6379
+    ) -> None:
+        """Create a new FalkorDB graph wrapper instance."""
+        try:
+            import redis
+            from redis.commands.graph import Graph
+        except ImportError:
+            raise ImportError(
+                "Could not import redis python package. "
+                "Please install it with `pip install redis`."
+            )
+
+        self._driver = redis.Redis(host=host, port=port)
+        self._graph = Graph(self._driver, database)
+
+        try:
+            self.refresh_schema()
+        except Exception as e:
+            raise ValueError(f"Could not refresh schema. Error: {e}")
+
+    @property
+    def get_schema(self) -> str:
+        """Returns the schema of the FalkorDB database"""
+        return self.schema
+
+    def refresh_schema(self) -> None:
+        """Refreshes the schema of the FalkorDB database"""
+        self.schema = (
+            f"Node properties: {node_properties_query}\n"
+            f"Relationships properties: {rel_properties_query}\n"
+            f"Relationships: {rel_query}\n"
+        )
+
+    def query(self, query: str, params: dict = {}) -> List[Dict[str, Any]]:
+        """Query FalkorDB database."""
+
+        try:
+            data = self._graph.query(query, params)
+            return data.result_set
+        except Exception as e:
+            raise ValueError("Generated Cypher Statement is not valid\n" f"{e}")
diff --git a/libs/langchain/tests/integration_tests/graphs/test_falkordb.py b/libs/langchain/tests/integration_tests/graphs/test_falkordb.py
new file mode 100644
index 00000000000..de6c77a4931
--- /dev/null
+++ b/libs/langchain/tests/integration_tests/graphs/test_falkordb.py
@@ -0,0 +1,34 @@
+import unittest
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+from langchain.graphs import FalkorDBGraph
+
+
+class TestFalkorDB(unittest.TestCase):
+    def setUp(self) -> None:
+        self.host = "localhost"
+        self.graph = "test_falkordb"
+        self.port = 6379
+
+    @patch("redis.Redis")
+    def test_init(self, mock_client: Any) -> None:
+        mock_client.return_value = MagicMock()
+        FalkorDBGraph(database=self.graph, host=self.host, port=self.port)
+
+    @patch("redis.Redis")
+    def test_execute(self, mock_client: Any) -> None:
+        mock_client.return_value = MagicMock()
+        graph = FalkorDBGraph(database=self.graph, host=self.host, port=self.port)
+
+        query = "RETURN 1"
+        result = graph.query(query)
+        self.assertIsInstance(result, MagicMock)
+
+    @patch("redis.Redis")
+    def test_refresh_schema(self, mock_client: Any) -> None:
+        mock_client.return_value = MagicMock()
+        graph = FalkorDBGraph(database=self.graph, host=self.host, port=self.port)
+
+        graph.refresh_schema()
+        self.assertNotEqual(graph.get_schema, "")

From bcc3463ff400b8d72aa299b46065fa2e27734743 Mon Sep 17 00:00:00 2001
From: Cameron Hutchison <chutch112292@gmail.com>
Date: Tue, 29 Aug 2023 14:29:27 -0700
Subject: [PATCH 41/58] docs: Azure AD Authentication for Azure OpenAI (#9951)

# Description
This PR adds additional documentation on how to use Azure Active
Directory to authenticate to an OpenAI service within Azure. This method
of authentication allows organizations with more complex security
requirements to use Azure OpenAI.

# Issue
N/A

# Dependencies
N/A

# Twitter
https://twitter.com/CamAHutchison
---
 .../llms/azure_openai_example.ipynb           | 42 ++++++++++++++++++-
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/docs/extras/integrations/llms/azure_openai_example.ipynb b/docs/extras/integrations/llms/azure_openai_example.ipynb
index eb5dbd22737..0c7262197be 100644
--- a/docs/extras/integrations/llms/azure_openai_example.ipynb
+++ b/docs/extras/integrations/llms/azure_openai_example.ipynb
@@ -30,7 +30,45 @@
     "```python\n",
     "import os\n",
     "os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
-    "...\n",
+    "```\n",
+    "\n",
+    "## Azure Active Directory Authentication\n",
+    "There are two ways you can authenticate to Azure OpenAI:\n",
+    "- API Key\n",
+    "- Azure Active Directory (AAD)\n",
+    "\n",
+    "Using the API key is the easiest way to get started. You can find your API key in the Azure portal under your Azure OpenAI resource.\n",
+    "\n",
+    "However, if you have complex security requirements - you may want to use Azure Active Directory. You can find more information on how to use AAD with Azure OpenAI [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity).\n",
+    "\n",
+    "If you are developing locally, you will need to have the Azure CLI installed and be logged in. You can install the Azure CLI [here](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). Then, run `az login` to log in.\n",
+    "\n",
+    "Add a role an Azure role assignment `Cognitive Services OpenAI User` scoped to your Azure OpenAI resource. This will allow you to get a token from AAD to use with Azure OpenAI. You can grant this role assignment to a user, group, service principal, or managed identity. For more information about Azure OpenAI RBAC roles see [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/role-based-access-control).\n",
+    "\n",
+    "To use AAD in Python with LangChain, install the `azure-identity` package. Then, set `OPENAI_API_TYPE` to `azure_ad`. Next, use the `DefaultAzureCredential` class to get a token from AAD by calling `get_token` as shown below. Finally, set the `OPENAI_API_KEY` environment variable to the token value.\n",
+    "\n",
+    "```python\n",
+    "import os\n",
+    "from azure.identity import DefaultAzureCredential\n",
+    "\n",
+    "# Get the Azure Credential\n",
+    "credential = DefaultAzureCredential()\n",
+    "\n",
+    "# Set the API type to `azure_ad`\n",
+    "os.environ[\"OPENAI_API_TYPE\"] = \"azure_ad\"\n",
+    "# Set the API_KEY to the token from the Azure credential\n",
+    "os.environ[\"OPENAI_API_KEY\"] = credential.get_token(\"https://cognitiveservices.azure.com/.default\").token\n",
+    "```\n",
+    "\n",
+    "The `DefaultAzureCredential` class is an easy way to get started with AAD authentication. You can also customize the credential chain if necessary. In the example shown below, we first try Managed Identity, then fall back to the Azure CLI. This is useful if you are running your code in Azure, but want to develop locally.\n",
+    "\n",
+    "```python\n",
+    "from azure.identity import ChainedTokenCredential, ManagedIdentityCredential, AzureCliCredential\n",
+    "\n",
+    "credential = ChainedTokenCredential(\n",
+    "    ManagedIdentityCredential(),\n",
+    "    AzureCliCredential()\n",
+    ")\n",
     "```\n",
     "\n",
     "## Deployments\n",
@@ -144,7 +182,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[1mAzureOpenAI\u001b[0m\n",
+      "\u001B[1mAzureOpenAI\u001B[0m\n",
       "Params: {'deployment_name': 'text-davinci-002', 'model_name': 'text-davinci-002', 'temperature': 0.7, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}\n"
      ]
     }

From 5cce6529a42724a70129bab8ae578208f2870001 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Tue, 29 Aug 2023 17:30:41 -0400
Subject: [PATCH 42/58] Speed up openai tests (#9943)

Saves ~8-10 seconds from total unit tests times

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 .../tests/unit_tests/llms/test_openai.py      | 45 +++++++++++++------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/libs/langchain/tests/unit_tests/llms/test_openai.py b/libs/langchain/tests/unit_tests/llms/test_openai.py
index 7af941a432a..db3c2b32c11 100644
--- a/libs/langchain/tests/unit_tests/llms/test_openai.py
+++ b/libs/langchain/tests/unit_tests/llms/test_openai.py
@@ -4,7 +4,9 @@ from typing import Any
 from unittest.mock import MagicMock, patch
 
 import pytest
+from tenacity import wait_none
 
+from langchain.llms import base
 from langchain.llms.openai import OpenAI
 from tests.unit_tests.callbacks.fake_callback_handler import (
     FakeAsyncCallbackHandler,
@@ -55,6 +57,16 @@ def mock_completion() -> dict:
     }
 
 
+def _patched_retry(*args: Any, **kwargs: Any) -> Any:
+    """Patched retry for unit tests that does not wait."""
+    from tenacity import retry
+
+    assert "wait" in kwargs
+    kwargs["wait"] = wait_none()
+    r = retry(*args, **kwargs)
+    return r
+
+
 @pytest.mark.requires("openai")
 def test_openai_retries(mock_completion: dict) -> None:
     llm = OpenAI()
@@ -73,13 +85,16 @@ def test_openai_retries(mock_completion: dict) -> None:
 
     mock_client.create = raise_once
     callback_handler = FakeCallbackHandler()
-    with patch.object(
-        llm,
-        "client",
-        mock_client,
-    ):
-        res = llm.predict("bar", callbacks=[callback_handler])
-        assert res == "Bar Baz"
+
+    # Patch the retry to avoid waiting during a unit test
+    with patch.object(base, "retry", _patched_retry):
+        with patch.object(
+            llm,
+            "client",
+            mock_client,
+        ):
+            res = llm.predict("bar", callbacks=[callback_handler])
+            assert res == "Bar Baz"
     assert completed
     assert raised
     assert callback_handler.retries == 1
@@ -105,13 +120,15 @@ async def test_openai_async_retries(mock_completion: dict) -> None:
 
     mock_client.acreate = araise_once
     callback_handler = FakeAsyncCallbackHandler()
-    with patch.object(
-        llm,
-        "client",
-        mock_client,
-    ):
-        res = await llm.apredict("bar", callbacks=[callback_handler])
-        assert res == "Bar Baz"
+    # Patch the retry to avoid waiting during a unit test
+    with patch.object(base, "retry", _patched_retry):
+        with patch.object(
+            llm,
+            "client",
+            mock_client,
+        ):
+            res = await llm.apredict("bar", callbacks=[callback_handler])
+            assert res == "Bar Baz"
     assert completed
     assert raised
     assert callback_handler.retries == 1

From 210de0c66b6c2bdfd87edb9d43ee949cf3b09ffe Mon Sep 17 00:00:00 2001
From: leo-gan <leo.gan.57@gmail.com>
Date: Tue, 29 Aug 2023 14:31:33 -0700
Subject: [PATCH 43/58] Updated title, description, added sections

---
 .../llms/google_vertex_ai_palm.ipynb          | 61 ++++++++++++++-----
 1 file changed, 47 insertions(+), 14 deletions(-)

diff --git a/docs/extras/integrations/llms/google_vertex_ai_palm.ipynb b/docs/extras/integrations/llms/google_vertex_ai_palm.ipynb
index f891ab162ff..968325c9ef7 100644
--- a/docs/extras/integrations/llms/google_vertex_ai_palm.ipynb
+++ b/docs/extras/integrations/llms/google_vertex_ai_palm.ipynb
@@ -1,17 +1,28 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Google Cloud Platform Vertex AI PaLM \n",
+    "# Google Vertex AI PaLM \n",
     "\n",
-    "Note: This is seperate from the Google PaLM integration, it exposes [Vertex AI PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) on Google Cloud. \n",
+    "**Note:** This is seperate from the `Google PaLM` integration, it exposes [Vertex AI PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) on `Google Cloud`. \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting up"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) customer data to train its foundation models as part of Google Cloud's AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
     "\n",
-    "By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) Customer Data to train its foundation models as part of Google Cloud`s AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
-    "\n",
-    "To use Vertex AI PaLM you must have the `google-cloud-aiplatform` Python package installed and either:\n",
+    "To use `Vertex AI PaLM` you must have the `google-cloud-aiplatform` Python package installed and either:\n",
     "- Have credentials configured for your environment (gcloud, workload identity, etc...)\n",
     "- Store the path to a service account JSON file as the GOOGLE_APPLICATION_CREDENTIALS environment variable\n",
     "\n",
@@ -19,8 +30,7 @@
     "\n",
     "For more information, see: \n",
     "- https://cloud.google.com/docs/authentication/application-default-credentials#GAC\n",
-    "- https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth\n",
-    "\n"
+    "- https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth"
    ]
   },
   {
@@ -40,7 +50,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain.llms import VertexAI\n",
+    "from langchain.llms import VertexAI"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question-answering example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "from langchain import PromptTemplate, LLMChain"
    ]
   },
@@ -98,13 +123,21 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can now leverage the Codey API for code generation within Vertex AI. The model names are:\n",
-    "- code-bison: for code suggestion\n",
-    "- code-gecko: for code completion"
+    "## Code generation example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can now leverage the `Codey API` for code generation within `Vertex AI`. \n",
+    "\n",
+    "The model names are:\n",
+    "- `code-bison`: for code suggestion\n",
+    "- `code-gecko`: for code completion"
    ]
   },
   {
@@ -191,7 +224,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {

From 0024824a6ec9b940e742843ddf840725592cf3f3 Mon Sep 17 00:00:00 2001
From: Nate Nethercott <53127799+nnethercott@users.noreply.github.com>
Date: Tue, 29 Aug 2023 23:50:07 +0200
Subject: [PATCH 44/58] docs: Fix spelling mistakes in
 retrievers/get_started.mdx (#9920)

Description: Fix spelling mistakes in retrievers/get_started.mdx
---
 .../snippets/modules/data_connection/retrievers/get_started.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/snippets/modules/data_connection/retrievers/get_started.mdx b/docs/snippets/modules/data_connection/retrievers/get_started.mdx
index 4cad6758460..6e492dc105f 100644
--- a/docs/snippets/modules/data_connection/retrievers/get_started.mdx
+++ b/docs/snippets/modules/data_connection/retrievers/get_started.mdx
@@ -33,7 +33,7 @@ class BaseRetriever(ABC):
         ...
 ```
 
-It's that simple! You can call `get_relevant_documents` or the async `get_relevant_documents` methods to retrieve documents relevant to a query, where "relevance" is defined by
+It's that simple! You can call `get_relevant_documents` or the async `aget_relevant_documents` methods to retrieve documents relevant to a query, where "relevance" is defined by
 the specific retriever object you are calling.
 
 Of course, we also help construct what we think useful Retrievers are. The main type of Retriever that we focus on is a Vectorstore retriever. We will focus on that for the rest of this guide.

From f69d236a4afc0ca58a33f8d387f9bbcced905f1f Mon Sep 17 00:00:00 2001
From: Fredrik Gullberg <frgul006@hotmail.com>
Date: Tue, 29 Aug 2023 23:53:00 +0200
Subject: [PATCH 45/58] docs: Fix spelling mistakes in apis.ipynb (#9911)

- Description: Fix spelling mistakes in apis.ipynb
- Issue: [#9910](https://github.com/langchain-ai/langchain/issues/9910)

Co-authored-by: Fredrik Gullberg <fredrik.gullberg@klarna.com>
---
 docs/extras/use_cases/apis.ipynb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/extras/use_cases/apis.ipynb b/docs/extras/use_cases/apis.ipynb
index 31d01199194..728eb4e92ec 100644
--- a/docs/extras/use_cases/apis.ipynb
+++ b/docs/extras/use_cases/apis.ipynb
@@ -145,7 +145,7 @@
    "source": [
     "## Functions \n",
     "\n",
-    "We can unpack what is hapening when we use the functions to calls external APIs.\n",
+    "We can unpack what is happening when we use the functions to call external APIs.\n",
     "\n",
     "Let's look at the [LangSmith trace](https://smith.langchain.com/public/76a58b85-193f-4eb7-ba40-747f0d5dd56e/r):\n",
     "\n",
@@ -155,10 +155,10 @@
     "https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\n",
     "```\n",
     "\n",
-    "* The prompt then tells the LLM to use the API spec wiith input question:\n",
+    "* The prompt then tells the LLM to use the API spec with input question:\n",
     "\n",
     "```\n",
-    "Use the provided API's to respond to this user query:\n",
+    "Use the provided APIs to respond to this user query:\n",
     "What are some options for a men's large blue button down shirt\n",
     "```\n",
     "\n",
@@ -278,7 +278,7 @@
     "![Image description](/img/api_chain.png)\n",
     "\n",
     "* [Here](https://github.com/langchain-ai/langchain/blob/bbd22b9b761389a5e40fc45b0570e1830aabb707/libs/langchain/langchain/chains/api/base.py#L82) we make the API request with the API url.\n",
-    "* The `api_answer_chain` takes the response from the API and provides us with a natural langugae response:\n",
+    "* The `api_answer_chain` takes the response from the API and provides us with a natural language response:\n",
     "\n",
     "![Image description](/img/api_chain_response.png)"
    ]

From 78c014399f3e63cf212224ec2e1f39faf95206f1 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Tue, 29 Aug 2023 14:53:15 -0700
Subject: [PATCH 46/58] fmt

---
 libs/langchain/langchain/vectorstores/deeplake.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/langchain/langchain/vectorstores/deeplake.py b/libs/langchain/langchain/vectorstores/deeplake.py
index 73696d6913c..9c8c8e15e19 100644
--- a/libs/langchain/langchain/vectorstores/deeplake.py
+++ b/libs/langchain/langchain/vectorstores/deeplake.py
@@ -115,9 +115,9 @@ class DeepLake(VectorStore):
                     responsible for storage and query execution. Only for data stored in
                     the Deep Lake Managed Database. Use runtime = {"db_engine": True}
                     during dataset creation.
-            runtime (Dict, optional): Parameters for creating the Vector Store in 
-                Deep Lake's Managed Tensor Database. Not applicable when loading an 
-                existing Vector Store. To create a Vector Store in the Managed Tensor 
+            runtime (Dict, optional): Parameters for creating the Vector Store in
+                Deep Lake's Managed Tensor Database. Not applicable when loading an
+                existing Vector Store. To create a Vector Store in the Managed Tensor
                 Database, set `runtime = {"tensor_db": True}`.
             **kwargs: Other optional keyword arguments.
 

From 0f85671630595a0c90a801c32950b44343981404 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Tue, 29 Aug 2023 14:55:25 -0700
Subject: [PATCH 47/58] fmt

---
 libs/langchain/langchain/vectorstores/deeplake.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/vectorstores/deeplake.py b/libs/langchain/langchain/vectorstores/deeplake.py
index 9c8c8e15e19..c5425294e87 100644
--- a/libs/langchain/langchain/vectorstores/deeplake.py
+++ b/libs/langchain/langchain/vectorstores/deeplake.py
@@ -140,7 +140,8 @@ class DeepLake(VectorStore):
             and version_compare(deeplake.__version__, "3.6.7") == -1
         ):
             raise ImportError(
-                "To use tensor_db option you need to update deeplake to `3.6.7` or higher. "
+                "To use tensor_db option you need to update deeplake to `3.6.7` or "
+                "higher. "
                 f"Currently installed deeplake version is {deeplake.__version__}. "
             )
 

From d799963870b49df3f1343761f122168225abf000 Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Tue, 29 Aug 2023 15:37:41 -0700
Subject: [PATCH 48/58] Wfh/async tool (#9878)

Co-authored-by: Daniel Brenot <dbrenot@pelmorex.com>
Co-authored-by: Daniel <daniel.alexander.brenot@gmail.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 libs/langchain/langchain/tools/base.py        | 100 ++++++++++++------
 .../tests/unit_tests/tools/test_base.py       |   2 +-
 2 files changed, 69 insertions(+), 33 deletions(-)

diff --git a/libs/langchain/langchain/tools/base.py b/libs/langchain/langchain/tools/base.py
index 7c24c01b3ae..5d59f7959bf 100644
--- a/libs/langchain/langchain/tools/base.py
+++ b/libs/langchain/langchain/tools/base.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import asyncio
+import inspect
 import warnings
 from abc import abstractmethod
 from functools import partial
@@ -437,7 +438,7 @@ class Tool(BaseTool):
     """Tool that takes in function or coroutine directly."""
 
     description: str = ""
-    func: Callable[..., str]
+    func: Optional[Callable[..., str]]
     """The function to run when the tool is called."""
     coroutine: Optional[Callable[..., Awaitable[str]]] = None
     """The asynchronous version of the function."""
@@ -488,16 +489,18 @@ class Tool(BaseTool):
         **kwargs: Any,
     ) -> Any:
         """Use the tool."""
-        new_argument_supported = signature(self.func).parameters.get("callbacks")
-        return (
-            self.func(
-                *args,
-                callbacks=run_manager.get_child() if run_manager else None,
-                **kwargs,
+        if self.func:
+            new_argument_supported = signature(self.func).parameters.get("callbacks")
+            return (
+                self.func(
+                    *args,
+                    callbacks=run_manager.get_child() if run_manager else None,
+                    **kwargs,
+                )
+                if new_argument_supported
+                else self.func(*args, **kwargs)
             )
-            if new_argument_supported
-            else self.func(*args, **kwargs)
-        )
+        raise NotImplementedError("Tool does not support sync")
 
     async def _arun(
         self,
@@ -523,7 +526,7 @@ class Tool(BaseTool):
 
     # TODO: this is for backwards compatibility, remove in future
     def __init__(
-        self, name: str, func: Callable, description: str, **kwargs: Any
+        self, name: str, func: Optional[Callable], description: str, **kwargs: Any
     ) -> None:
         """Initialize tool."""
         super(Tool, self).__init__(
@@ -533,17 +536,23 @@ class Tool(BaseTool):
     @classmethod
     def from_function(
         cls,
-        func: Callable,
+        func: Optional[Callable],
         name: str,  # We keep these required to support backwards compatibility
         description: str,
         return_direct: bool = False,
         args_schema: Optional[Type[BaseModel]] = None,
+        coroutine: Optional[
+            Callable[..., Awaitable[Any]]
+        ] = None,  # This is last for compatibility, but should be after func
         **kwargs: Any,
     ) -> Tool:
         """Initialize tool from a function."""
+        if func is None and coroutine is None:
+            raise ValueError("Function and/or coroutine must be provided")
         return cls(
             name=name,
             func=func,
+            coroutine=coroutine,
             description=description,
             return_direct=return_direct,
             args_schema=args_schema,
@@ -557,7 +566,7 @@ class StructuredTool(BaseTool):
     description: str = ""
     args_schema: Type[BaseModel] = Field(..., description="The tool schema.")
     """The input arguments' schema."""
-    func: Callable[..., Any]
+    func: Optional[Callable[..., Any]]
     """The function to run when the tool is called."""
     coroutine: Optional[Callable[..., Awaitable[Any]]] = None
     """The asynchronous version of the function."""
@@ -592,16 +601,18 @@ class StructuredTool(BaseTool):
         **kwargs: Any,
     ) -> Any:
         """Use the tool."""
-        new_argument_supported = signature(self.func).parameters.get("callbacks")
-        return (
-            self.func(
-                *args,
-                callbacks=run_manager.get_child() if run_manager else None,
-                **kwargs,
+        if self.func:
+            new_argument_supported = signature(self.func).parameters.get("callbacks")
+            return (
+                self.func(
+                    *args,
+                    callbacks=run_manager.get_child() if run_manager else None,
+                    **kwargs,
+                )
+                if new_argument_supported
+                else self.func(*args, **kwargs)
             )
-            if new_argument_supported
-            else self.func(*args, **kwargs)
-        )
+        raise NotImplementedError("Tool does not support sync")
 
     async def _arun(
         self,
@@ -628,7 +639,8 @@ class StructuredTool(BaseTool):
     @classmethod
     def from_function(
         cls,
-        func: Callable,
+        func: Optional[Callable] = None,
+        coroutine: Optional[Callable[..., Awaitable[Any]]] = None,
         name: Optional[str] = None,
         description: Optional[str] = None,
         return_direct: bool = False,
@@ -642,6 +654,7 @@ class StructuredTool(BaseTool):
 
         Args:
             func: The function from which to create a tool
+            coroutine: The async function from which to create a tool
             name: The name of the tool. Defaults to the function name
             description: The description of the tool. Defaults to the function docstring
             return_direct: Whether to return the result directly or as a callback
@@ -662,21 +675,31 @@ class StructuredTool(BaseTool):
                 tool = StructuredTool.from_function(add)
                 tool.run(1, 2) # 3
         """
-        name = name or func.__name__
-        description = description or func.__doc__
-        assert (
-            description is not None
-        ), "Function must have a docstring if description not provided."
+
+        if func is not None:
+            source_function = func
+        elif coroutine is not None:
+            source_function = coroutine
+        else:
+            raise ValueError("Function and/or coroutine must be provided")
+        name = name or source_function.__name__
+        description = description or source_function.__doc__
+        if description is None:
+            raise ValueError(
+                "Function must have a docstring if description not provided."
+            )
 
         # Description example:
         # search_api(query: str) - Searches the API for the query.
-        description = f"{name}{signature(func)} - {description.strip()}"
+        sig = signature(source_function)
+        description = f"{name}{sig} - {description.strip()}"
         _args_schema = args_schema
         if _args_schema is None and infer_schema:
-            _args_schema = create_schema_from_function(f"{name}Schema", func)
+            _args_schema = create_schema_from_function(f"{name}Schema", source_function)
         return cls(
             name=name,
             func=func,
+            coroutine=coroutine,
             args_schema=_args_schema,
             description=description,
             return_direct=return_direct,
@@ -720,10 +743,18 @@ def tool(
     """
 
     def _make_with_name(tool_name: str) -> Callable:
-        def _make_tool(func: Callable) -> BaseTool:
+        def _make_tool(dec_func: Callable) -> BaseTool:
+            if inspect.iscoroutinefunction(dec_func):
+                coroutine = dec_func
+                func = None
+            else:
+                coroutine = None
+                func = dec_func
+
             if infer_schema or args_schema is not None:
                 return StructuredTool.from_function(
                     func,
+                    coroutine,
                     name=tool_name,
                     return_direct=return_direct,
                     args_schema=args_schema,
@@ -731,12 +762,17 @@ def tool(
                 )
             # If someone doesn't want a schema applied, we must treat it as
             # a simple string->string function
-            assert func.__doc__ is not None, "Function must have a docstring"
+            if func.__doc__ is None:
+                raise ValueError(
+                    "Function must have a docstring if "
+                    "description not provided and infer_schema is False."
+                )
             return Tool(
                 name=tool_name,
                 func=func,
                 description=f"{tool_name} tool",
                 return_direct=return_direct,
+                coroutine=coroutine,
             )
 
         return _make_tool
diff --git a/libs/langchain/tests/unit_tests/tools/test_base.py b/libs/langchain/tests/unit_tests/tools/test_base.py
index a66953a0669..9c5fdf39e10 100644
--- a/libs/langchain/tests/unit_tests/tools/test_base.py
+++ b/libs/langchain/tests/unit_tests/tools/test_base.py
@@ -546,7 +546,7 @@ def test_tool_with_kwargs() -> None:
 def test_missing_docstring() -> None:
     """Test error is raised when docstring is missing."""
     # expect to throw a value error if there's no docstring
-    with pytest.raises(AssertionError, match="Function must have a docstring"):
+    with pytest.raises(ValueError, match="Function must have a docstring"):
 
         @tool
         def search_api(query: str) -> str:

From 8c1678a8c743ad322b9de1c84a2e4d6b4c840ee3 Mon Sep 17 00:00:00 2001
From: leo-gan <leo.gan.57@gmail.com>
Date: Tue, 29 Aug 2023 15:40:12 -0700
Subject: [PATCH 49/58] Updated titles, descriptions.

---
 docs/extras/integrations/tools/arxiv.ipynb    |   9 +-
 .../extras/integrations/tools/awslambda.ipynb |  21 +-
 docs/extras/integrations/tools/bash.ipynb     |   8 +-
 .../integrations/tools/dataforseo.ipynb       |  28 +--
 .../integrations/tools/filesystem.ipynb       |   6 +-
 .../integrations/tools/google_serper.ipynb    | 238 +++++++++---------
 .../integrations/tools/gradio_tools.ipynb     |  12 +-
 docs/extras/integrations/tools/graphql.ipynb  |  21 +-
 .../tools/huggingface_tools.ipynb             |   6 +-
 docs/extras/integrations/tools/lemonai.ipynb  |  26 +-
 docs/extras/integrations/tools/nuclia.ipynb   |  22 +-
 .../integrations/tools/openweathermap.ipynb   |   8 +-
 docs/extras/integrations/tools/pubmed.ipynb   |   8 +-
 .../integrations/tools/searx_search.ipynb     |   8 +-
 docs/extras/integrations/tools/youtube.ipynb  |   6 +-
 docs/extras/integrations/tools/zapier.ipynb   |  17 +-
 16 files changed, 211 insertions(+), 233 deletions(-)

diff --git a/docs/extras/integrations/tools/arxiv.ipynb b/docs/extras/integrations/tools/arxiv.ipynb
index bffb548d39d..efe5e1152c4 100644
--- a/docs/extras/integrations/tools/arxiv.ipynb
+++ b/docs/extras/integrations/tools/arxiv.ipynb
@@ -5,9 +5,9 @@
    "id": "245a954a",
    "metadata": {},
    "source": [
-    "# ArXiv API Tool\n",
+    "# ArXiv\n",
     "\n",
-    "This notebook goes over how to use the `arxiv` component. \n",
+    "This notebook goes over how to use the `arxiv` tool with an agent. \n",
     "\n",
     "First, you need to install `arxiv` python package."
    ]
@@ -110,7 +110,7 @@
    "source": [
     "## The ArXiv API Wrapper\n",
     "\n",
-    "The tool wraps the API Wrapper. Below, we can explore some of the features it provides."
+    "The tool uses the `API Wrapper`. Below, we explore some of the features it provides."
    ]
   },
   {
@@ -167,7 +167,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "840f70c9-8f80-4680-bb38-46198e931bcf",
    "metadata": {},
@@ -250,7 +249,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/awslambda.ipynb b/docs/extras/integrations/tools/awslambda.ipynb
index 7bc492bb6a1..be44befa2d4 100644
--- a/docs/extras/integrations/tools/awslambda.ipynb
+++ b/docs/extras/integrations/tools/awslambda.ipynb
@@ -1,25 +1,23 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# AWS Lambda API"
+    "# AWS Lambda"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This notebook goes over how to use the AWS Lambda Tool component.\n",
+    ">`Amazon AWS Lambda` is a serverless computing service provided by `Amazon Web Services` (`AWS`). It helps developers to build and run applications and services without provisioning or managing servers. This serverless architecture enables you to focus on writing and deploying code, while AWS automatically takes care of scaling, patching, and managing the infrastructure required to run your applications.\n",
     "\n",
-    "AWS Lambda is a serverless computing service provided by Amazon Web Services (AWS), designed to allow developers to build and run applications and services without the need for provisioning or managing servers. This serverless architecture enables you to focus on writing and deploying code, while AWS automatically takes care of scaling, patching, and managing the infrastructure required to run your applications.\n",
+    "This notebook goes over how to use the `AWS Lambda` Tool.\n",
     "\n",
     "By including a `awslambda` in the list of tools provided to an Agent, you can grant your Agent the ability to invoke code running in your AWS Cloud for whatever purposes you need.\n",
     "\n",
-    "When an Agent uses the awslambda tool, it will provide an argument of type string which will in turn be passed into the Lambda function via the event parameter.\n",
+    "When an Agent uses the `AWS Lambda` tool, it will provide an argument of type string which will in turn be passed into the Lambda function via the event parameter.\n",
     "\n",
     "First, you need to install `boto3` python package."
    ]
@@ -38,7 +36,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -48,7 +45,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -98,7 +94,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -112,10 +108,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
-  },
-  "orig_nbformat": 4
+   "version": "3.10.12"
+  }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/docs/extras/integrations/tools/bash.ipynb b/docs/extras/integrations/tools/bash.ipynb
index 5e3a9245fe7..523b9d856e3 100644
--- a/docs/extras/integrations/tools/bash.ipynb
+++ b/docs/extras/integrations/tools/bash.ipynb
@@ -5,11 +5,13 @@
    "id": "8f210ec3",
    "metadata": {},
    "source": [
-    "# Shell Tool\n",
+    "# Shell (bash)\n",
     "\n",
     "Giving agents access to the shell is powerful (though risky outside a sandboxed environment).\n",
     "\n",
-    "The LLM can use it to execute any shell commands. A common use case for this is letting the LLM interact with your local file system."
+    "The LLM can use it to execute any shell commands. A common use case for this is letting the LLM interact with your local file system.\n",
+    "\n",
+    "**Note:** Shell tool does not work with Windows OS."
    ]
   },
   {
@@ -184,7 +186,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/dataforseo.ipynb b/docs/extras/integrations/tools/dataforseo.ipynb
index 8093aea2b25..fbf4b2c780e 100644
--- a/docs/extras/integrations/tools/dataforseo.ipynb
+++ b/docs/extras/integrations/tools/dataforseo.ipynb
@@ -1,12 +1,12 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# DataForSeo API Wrapper\n",
-    "This notebook demonstrates how to use the DataForSeo API wrapper to obtain search engine results. The DataForSeo API allows users to retrieve SERP from most popular search engines like Google, Bing, Yahoo. It also allows to get SERPs from different search engine types like Maps, News, Events, etc.\n"
+    "# DataForSeo\n",
+    "\n",
+    "This notebook demonstrates how to use the `DataForSeo API` to obtain search engine results. The `DataForSeo API` retrieves `SERP` from most popular search engines like `Google`, `Bing`, `Yahoo`. It also allows to get SERPs from different search engine types like `Maps`, `News`, `Events`, etc.\n"
    ]
   },
   {
@@ -19,12 +19,12 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Setting up the API wrapper with your credentials\n",
-    "You can obtain your API credentials by registering on the DataForSeo website."
+    "## Setting up the API credentials\n",
+    "\n",
+    "You can obtain your API credentials by registering on the `DataForSeo` website."
    ]
   },
   {
@@ -42,7 +42,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -59,7 +58,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -72,7 +70,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -103,7 +100,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -127,7 +123,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -151,7 +146,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -178,7 +172,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -214,7 +207,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -228,10 +221,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
-  },
-  "orig_nbformat": 4
+   "version": "3.10.12"
+  }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/docs/extras/integrations/tools/filesystem.ipynb b/docs/extras/integrations/tools/filesystem.ipynb
index 271ed3814bc..85462a88858 100644
--- a/docs/extras/integrations/tools/filesystem.ipynb
+++ b/docs/extras/integrations/tools/filesystem.ipynb
@@ -4,11 +4,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# File System Tools\n",
+    "# File System\n",
     "\n",
     "LangChain provides tools for interacting with a local file system out of the box. This notebook walks through some of them.\n",
     "\n",
-    "Note: these tools are not recommended for use outside a sandboxed environment! "
+    "**Note:** these tools are not recommended for use outside a sandboxed environment! "
    ]
   },
   {
@@ -187,7 +187,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/google_serper.ipynb b/docs/extras/integrations/tools/google_serper.ipynb
index 0a42900ab1b..5f519443be6 100644
--- a/docs/extras/integrations/tools/google_serper.ipynb
+++ b/docs/extras/integrations/tools/google_serper.ipynb
@@ -5,32 +5,35 @@
    "id": "dc23c48e",
    "metadata": {},
    "source": [
-    "# Google Serper API\n",
+    "# Google Serper\n",
     "\n",
-    "This notebook goes over how to use the Google Serper component to search the web. First you need to sign up for a free account at [serper.dev](https://serper.dev) and get your api key."
+    "This notebook goes over how to use the `Google Serper` component to search the web. First you need to sign up for a free account at [serper.dev](https://serper.dev) and get your api key."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 11,
+   "id": "a8acfb24",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-04T00:56:29.336521Z",
+     "start_time": "2023-05-04T00:56:29.334173Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "is_executing": true
+    }
+   },
    "outputs": [],
    "source": [
     "import os\n",
     "import pprint\n",
     "\n",
     "os.environ[\"SERPER_API_KEY\"] = \"\""
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "is_executing": true
-    },
-    "ExecuteTime": {
-     "end_time": "2023-05-04T00:56:29.336521Z",
-     "start_time": "2023-05-04T00:56:29.334173Z"
-    }
-   },
-   "id": "a8acfb24"
+   ]
   },
   {
    "cell_type": "code",
@@ -75,7 +78,9 @@
    "outputs": [
     {
      "data": {
-      "text/plain": "'Barack Hussein Obama II'"
+      "text/plain": [
+       "'Barack Hussein Obama II'"
+      ]
      },
      "execution_count": 4,
      "metadata": {},
@@ -88,33 +93,41 @@
   },
   {
    "cell_type": "markdown",
+   "id": "1f1c6c22",
+   "metadata": {},
    "source": [
     "## As part of a Self Ask With Search Chain"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "1f1c6c22"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "outputs": [],
-   "source": [
-    "os.environ[\"OPENAI_API_KEY\"] = \"\""
-   ],
+   "id": "c1b5edd7",
    "metadata": {
-    "collapsed": false,
     "ExecuteTime": {
      "end_time": "2023-05-04T00:54:14.311773Z",
      "start_time": "2023-05-04T00:54:14.304389Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
     }
    },
-   "id": "c1b5edd7"
+   "outputs": [],
+   "source": [
+    "os.environ[\"OPENAI_API_KEY\"] = \"\""
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
+   "id": "a8ccea61",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -135,7 +148,9 @@
     },
     {
      "data": {
-      "text/plain": "'El Palmar, Spain'"
+      "text/plain": [
+       "'El Palmar, Spain'"
+      ]
      },
      "execution_count": 5,
      "metadata": {},
@@ -164,26 +179,34 @@
     "self_ask_with_search.run(\n",
     "    \"What is the hometown of the reigning men's U.S. Open champion?\"\n",
     ")"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "a8ccea61"
+   ]
   },
   {
    "cell_type": "markdown",
+   "id": "3aee3682",
+   "metadata": {},
    "source": [
     "## Obtaining results with metadata\n",
     "If you would also like to obtain the results in a structured way including metadata. For this we will be using the `results` method of the wrapper."
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "3aee3682"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
+   "id": "073c3fc5",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-04T00:54:22.863413Z",
+     "start_time": "2023-05-04T00:54:20.827395Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "is_executing": true
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -344,33 +367,31 @@
     "search = GoogleSerperAPIWrapper()\n",
     "results = search.results(\"Apple Inc.\")\n",
     "pprint.pp(results)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "is_executing": true
-    },
-    "ExecuteTime": {
-     "end_time": "2023-05-04T00:54:22.863413Z",
-     "start_time": "2023-05-04T00:54:20.827395Z"
-    }
-   },
-   "id": "073c3fc5"
+   ]
   },
   {
    "cell_type": "markdown",
+   "id": "b402c308",
+   "metadata": {},
    "source": [
     "## Searching for Google Images\n",
     "We can also query Google Images using this wrapper. For example:"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "b402c308"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 7,
+   "id": "7fb2b7e2",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-04T00:54:27.879867Z",
+     "start_time": "2023-05-04T00:54:26.380022Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -501,30 +522,31 @@
     "search = GoogleSerperAPIWrapper(type=\"images\")\n",
     "results = search.results(\"Lion\")\n",
     "pprint.pp(results)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-04T00:54:27.879867Z",
-     "start_time": "2023-05-04T00:54:26.380022Z"
-    }
-   },
-   "id": "7fb2b7e2"
+   ]
   },
   {
    "cell_type": "markdown",
+   "id": "85a3bed3",
+   "metadata": {},
    "source": [
     "## Searching for Google News\n",
     "We can also query Google News using this wrapper. For example:"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "85a3bed3"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 8,
+   "id": "afc48b39",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-04T00:54:34.984087Z",
+     "start_time": "2023-05-04T00:54:33.369231Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -630,29 +652,30 @@
     "search = GoogleSerperAPIWrapper(type=\"news\")\n",
     "results = search.results(\"Tesla Inc.\")\n",
     "pprint.pp(results)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-04T00:54:34.984087Z",
-     "start_time": "2023-05-04T00:54:33.369231Z"
-    }
-   },
-   "id": "afc48b39"
+   ]
   },
   {
    "cell_type": "markdown",
+   "id": "d42ee7b5",
+   "metadata": {},
    "source": [
     "If you want to only receive news articles published in the last hour, you can do the following:"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "d42ee7b5"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 9,
+   "id": "8e3824cb",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-04T00:54:41.786864Z",
+     "start_time": "2023-05-04T00:54:40.691905Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -701,18 +724,12 @@
     "search = GoogleSerperAPIWrapper(type=\"news\", tbs=\"qdr:h\")\n",
     "results = search.results(\"Tesla Inc.\")\n",
     "pprint.pp(results)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-04T00:54:41.786864Z",
-     "start_time": "2023-05-04T00:54:40.691905Z"
-    }
-   },
-   "id": "8e3824cb"
+   ]
   },
   {
    "cell_type": "markdown",
+   "id": "3f13e9f9",
+   "metadata": {},
    "source": [
     "Some examples of the `tbs` parameter:\n",
     "\n",
@@ -730,26 +747,31 @@
     "`qdr:m2` (past 2 years)\n",
     "\n",
     "For all supported filters simply go to [Google Search](https://google.com), search for something, click on \"Tools\", add your date filter and check the URL for \"tbs=\".\n"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "3f13e9f9"
+   ]
   },
   {
    "cell_type": "markdown",
+   "id": "38d4402c",
+   "metadata": {},
    "source": [
     "## Searching for Google Places\n",
     "We can also query Google Places using this wrapper. For example:"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "38d4402c"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 10,
+   "id": "e7881203",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-04T00:56:07.271164Z",
+     "start_time": "2023-05-04T00:56:05.645847Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -858,15 +880,7 @@
     "search = GoogleSerperAPIWrapper(type=\"places\")\n",
     "results = search.results(\"Italian restaurants in Upper East Side\")\n",
     "pprint.pp(results)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-04T00:56:07.271164Z",
-     "start_time": "2023-05-04T00:56:05.645847Z"
-    }
-   },
-   "id": "e7881203"
+   ]
   }
  ],
  "metadata": {
@@ -885,9 +899,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/docs/extras/integrations/tools/gradio_tools.ipynb b/docs/extras/integrations/tools/gradio_tools.ipynb
index e2bbe4df01e..9461ac46542 100644
--- a/docs/extras/integrations/tools/gradio_tools.ipynb
+++ b/docs/extras/integrations/tools/gradio_tools.ipynb
@@ -5,11 +5,11 @@
    "id": "c613812f",
    "metadata": {},
    "source": [
-    "# Gradio Tools\n",
+    "# Gradio\n",
     "\n",
-    "There are many 1000s of Gradio apps on Hugging Face Spaces. This library puts them at the tips of your LLM's fingers 🦾\n",
+    "There are many 1000s of `Gradio` apps on `Hugging Face Spaces`. This library puts them at the tips of your LLM's fingers 🦾\n",
     "\n",
-    "Specifically, gradio-tools is a Python library for converting Gradio apps into tools that can be leveraged by a large language model (LLM)-based agent to complete its task. For example, an LLM could use a Gradio tool to transcribe a voice recording it finds online and then summarize it for you. Or it could use a different Gradio tool to apply OCR to a document on your Google Drive and then answer questions about it.\n",
+    "Specifically, `gradio-tools` is a Python library for converting `Gradio` apps into tools that can be leveraged by a large language model (LLM)-based agent to complete its task. For example, an LLM could use a `Gradio` tool to transcribe a voice recording it finds online and then summarize it for you. Or it could use a different `Gradio` tool to apply OCR to a document on your Google Drive and then answer questions about it.\n",
     "\n",
     "It's very easy to create you own tool if you want to use a space that's not one of the pre-built tools. Please see this section of the gradio-tools documentation for information on how to do that. All contributions are welcome!"
    ]
@@ -99,9 +99,7 @@
    "cell_type": "code",
    "execution_count": 13,
    "id": "98e1e602",
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -244,7 +242,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/graphql.ipynb b/docs/extras/integrations/tools/graphql.ipynb
index ecc0de58430..6cdd3d3019b 100644
--- a/docs/extras/integrations/tools/graphql.ipynb
+++ b/docs/extras/integrations/tools/graphql.ipynb
@@ -4,17 +4,17 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "# GraphQL\n",
     "\n",
-    "# GraphQL tool\n",
-    "This Jupyter Notebook demonstrates how to use the BaseGraphQLTool component with an Agent.\n",
+    ">[GraphQL](https://graphql.org/) is a query language for APIs and a runtime for executing those queries against your data. `GraphQL` provides a complete and understandable description of the data in your API, gives clients the power to ask for exactly what they need and nothing more, makes it easier to evolve APIs over time, and enables powerful developer tools.\n",
     "\n",
-    "GraphQL is a query language for APIs and a runtime for executing those queries against your data. GraphQL provides a complete and understandable description of the data in your API, gives clients the power to ask for exactly what they need and nothing more, makes it easier to evolve APIs over time, and enables powerful developer tools.\n",
+    "By including a `BaseGraphQLTool` in the list of tools provided to an Agent, you can grant your Agent the ability to query data from GraphQL APIs for any purposes you need.\n",
     "\n",
-    "By including a BaseGraphQLTool in the list of tools provided to an Agent, you can grant your Agent the ability to query data from GraphQL APIs for any purposes you need.\n",
+    "This Jupyter Notebook demonstrates how to use the `GraphQLAPIWrapper` component with an Agent.\n",
     "\n",
-    "In this example, we'll be using the public Star Wars GraphQL API available at the following endpoint: https://swapi-graphql.netlify.app/.netlify/functions/index.\n",
+    "In this example, we'll be using the public `Star Wars GraphQL API` available at the following endpoint: https://swapi-graphql.netlify.app/.netlify/functions/index.\n",
     "\n",
-    "First, you need to install httpx and gql Python packages."
+    "First, you need to install `httpx` and `gql` Python packages."
    ]
   },
   {
@@ -131,7 +131,7 @@
    "hash": "f85209c3c4c190dca7367d6a1e623da50a9a4392fd53313a7cf9d4bda9c4b85b"
   },
   "kernelspec": {
-   "display_name": "Python 3.9.16 ('langchain')",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -145,10 +145,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
-  },
-  "orig_nbformat": 4
+   "version": "3.10.12"
+  }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/docs/extras/integrations/tools/huggingface_tools.ipynb b/docs/extras/integrations/tools/huggingface_tools.ipynb
index fc7cce94177..7f8a98ea331 100644
--- a/docs/extras/integrations/tools/huggingface_tools.ipynb
+++ b/docs/extras/integrations/tools/huggingface_tools.ipynb
@@ -5,9 +5,9 @@
    "id": "40a27d3c-4e5c-4b96-b290-4c49d4fd7219",
    "metadata": {},
    "source": [
-    "## HuggingFace Tools\n",
+    "# HuggingFace Hub Tools\n",
     "\n",
-    "[Huggingface Tools](https://huggingface.co/docs/transformers/v4.29.0/en/custom_tools) supporting text I/O can be\n",
+    ">[Huggingface Tools](https://huggingface.co/docs/transformers/v4.29.0/en/custom_tools) that supporting text I/O can be\n",
     "loaded directly using the `load_huggingface_tool` function."
    ]
   },
@@ -94,7 +94,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/lemonai.ipynb b/docs/extras/integrations/tools/lemonai.ipynb
index c8dec20beaa..b85db1fb862 100644
--- a/docs/extras/integrations/tools/lemonai.ipynb
+++ b/docs/extras/integrations/tools/lemonai.ipynb
@@ -1,24 +1,23 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "16763ed3",
    "metadata": {},
    "source": [
-    "# Lemon AI NLP Workflow Automation\n",
-    "\\\n",
-    "Full docs are available at: https://github.com/felixbrock/lemonai-py-client\n",
+    "# Lemon Agent\n",
+    "\n",
+    ">[Lemon Agent](https://github.com/felixbrock/lemon-agent) helps you build powerful AI assistants in minutes and automate workflows by allowing for accurate and reliable read and write operations in tools like `Airtable`, `Hubspot`, `Discord`, `Notion`, `Slack` and `Github`.\n",
+    "\n",
+    "See [full docs here](https://github.com/felixbrock/lemonai-py-client).\n",
     "\n",
-    "**Lemon AI helps you build powerful AI assistants in minutes and automate workflows by allowing for accurate and reliable read and write operations in tools like Airtable, Hubspot, Discord, Notion, Slack and Github.**\n",
     "\n",
     "Most connectors available today are focused on read-only operations, limiting the potential of LLMs. Agents, on the other hand, have a tendency to hallucinate from time to time due to missing context or instructions.\n",
     "\n",
-    "With Lemon AI, it is possible to give your agents access to well-defined APIs for reliable read and write operations. In addition, Lemon AI functions allow you to further reduce the risk of hallucinations by providing a way to statically define workflows that the model can rely on in case of uncertainty."
+    "With `Lemon AI`, it is possible to give your agents access to well-defined APIs for reliable read and write operations. In addition, `Lemon AI` functions allow you to further reduce the risk of hallucinations by providing a way to statically define workflows that the model can rely on in case of uncertainty."
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "4881b484-1b97-478f-b206-aec407ceff66",
    "metadata": {},
@@ -29,7 +28,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "ff91b41a",
    "metadata": {},
@@ -46,7 +44,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "340ff63d",
    "metadata": {},
@@ -57,7 +54,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "e845f402",
    "metadata": {},
@@ -66,7 +62,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "d3ae6a82",
    "metadata": {},
@@ -75,7 +70,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "43476a22",
    "metadata": {},
@@ -84,7 +78,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "cb038670",
    "metadata": {},
@@ -93,7 +86,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "e423ebbb",
    "metadata": {},
@@ -110,7 +102,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "3fdb36ce",
    "metadata": {},
@@ -119,7 +110,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "ebfb8b5d",
    "metadata": {},
@@ -140,7 +130,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "c9d082cb",
    "metadata": {},
@@ -189,7 +178,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "aef3e801",
    "metadata": {},
@@ -225,7 +213,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/nuclia.ipynb b/docs/extras/integrations/tools/nuclia.ipynb
index 331533388a6..4529e4cac35 100644
--- a/docs/extras/integrations/tools/nuclia.ipynb
+++ b/docs/extras/integrations/tools/nuclia.ipynb
@@ -1,17 +1,16 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Nuclia Understanding API tool\n",
+    "# Nuclia Understanding\n",
     "\n",
-    "[Nuclia](https://nuclia.com) automatically indexes your unstructured data from any internal and external source, providing optimized search results and generative answers. It can handle video and audio transcription, image content extraction, and document parsing.\n",
+    ">[Nuclia](https://nuclia.com) automatically indexes your unstructured data from any internal and external source, providing optimized search results and generative answers. It can handle video and audio transcription, image content extraction, and document parsing.\n",
     "\n",
-    "The Nuclia Understanding API supports the processing of unstructured data, including text, web pages, documents, and audio/video contents. It extracts all texts wherever it is (using speech-to-text or OCR when needed), it identifies entities, it aslo extracts metadata, embedded files (like images in a PDF), and web links. It also provides a summary of the content.\n",
+    "The `Nuclia Understanding API` supports the processing of unstructured data, including text, web pages, documents, and audio/video contents. It extracts all texts wherever it is (using speech-to-text or OCR when needed), it identifies entities, it aslo extracts metadata, embedded files (like images in a PDF), and web links. It also provides a summary of the content.\n",
     "\n",
-    "To use the Nuclia Understanding API, you need to have a Nuclia account. You can create one for free at [https://nuclia.cloud](https://nuclia.cloud), and then [create a NUA key](https://docs.nuclia.dev/docs/docs/using/understanding/intro)."
+    "To use the `Nuclia Understanding API`, you need to have a `Nuclia` account. You can create one for free at [https://nuclia.cloud](https://nuclia.cloud), and then [create a NUA key](https://docs.nuclia.dev/docs/docs/using/understanding/intro)."
    ]
   },
   {
@@ -48,7 +47,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -66,7 +64,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -94,7 +91,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -121,7 +117,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -150,7 +145,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "langchain",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -164,10 +159,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.5"
-  },
-  "orig_nbformat": 4
+   "version": "3.10.12"
+  }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/docs/extras/integrations/tools/openweathermap.ipynb b/docs/extras/integrations/tools/openweathermap.ipynb
index a88db114c91..c4932dbf38a 100644
--- a/docs/extras/integrations/tools/openweathermap.ipynb
+++ b/docs/extras/integrations/tools/openweathermap.ipynb
@@ -5,11 +5,11 @@
    "id": "245a954a",
    "metadata": {},
    "source": [
-    "# OpenWeatherMap API\n",
+    "# OpenWeatherMap\n",
     "\n",
-    "This notebook goes over how to use the OpenWeatherMap component to fetch weather information.\n",
+    "This notebook goes over how to use the `OpenWeatherMap` component to fetch weather information.\n",
     "\n",
-    "First, you need to sign up for an OpenWeatherMap API key:\n",
+    "First, you need to sign up for an `OpenWeatherMap API` key:\n",
     "\n",
     "1. Go to OpenWeatherMap and sign up for an API key [here](https://openweathermap.org/api/)\n",
     "2. pip install pyowm\n",
@@ -162,7 +162,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/pubmed.ipynb b/docs/extras/integrations/tools/pubmed.ipynb
index 0e2c3849c5e..8e5ab587316 100644
--- a/docs/extras/integrations/tools/pubmed.ipynb
+++ b/docs/extras/integrations/tools/pubmed.ipynb
@@ -5,11 +5,11 @@
    "id": "64f20f38",
    "metadata": {},
    "source": [
-    "# PubMed Tool\n",
+    "# PubMed\n",
     "\n",
-    "This notebook goes over how to use PubMed as a tool\n",
+    ">[PubMed®](https://pubmed.ncbi.nlm.nih.gov/) comprises more than 35 million citations for biomedical literature from `MEDLINE`, life science journals, and online books. Citations may include links to full text content from PubMed Central and publisher web sites.\n",
     "\n",
-    "PubMed® comprises more than 35 million citations for biomedical literature from MEDLINE, life science journals, and online books. Citations may include links to full text content from PubMed Central and publisher web sites."
+    "This notebook goes over how to use `PubMed` as a tool."
    ]
   },
   {
@@ -78,7 +78,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/searx_search.ipynb b/docs/extras/integrations/tools/searx_search.ipynb
index 73621dae63e..ccd87ce9374 100644
--- a/docs/extras/integrations/tools/searx_search.ipynb
+++ b/docs/extras/integrations/tools/searx_search.ipynb
@@ -6,11 +6,11 @@
     "jukit_cell_id": "DUXgyWySl5"
    },
    "source": [
-    "# SearxNG Search API\n",
+    "# SearxNG Search\n",
     "\n",
-    "This notebook goes over how to use a self hosted SearxNG search API to search the web.\n",
+    "This notebook goes over how to use a self hosted `SearxNG` search API to search the web.\n",
     "\n",
-    "You can [check this link](https://docs.searxng.org/dev/search_api.html) for more informations about Searx API parameters."
+    "You can [check this link](https://docs.searxng.org/dev/search_api.html) for more informations about `Searx API` parameters."
    ]
   },
   {
@@ -611,7 +611,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/youtube.ipynb b/docs/extras/integrations/tools/youtube.ipynb
index 567aa0ef421..f4aaa491dc9 100644
--- a/docs/extras/integrations/tools/youtube.ipynb
+++ b/docs/extras/integrations/tools/youtube.ipynb
@@ -5,9 +5,9 @@
    "id": "acb64858",
    "metadata": {},
    "source": [
-    "# YouTubeSearchTool\n",
+    "# YouTube (youtube_search)\n",
     "\n",
-    "This notebook shows how to use a tool to search YouTube\n",
+    "This notebook shows how to use a tool to search `YouTube` using `youtube_search` package.\n",
     "\n",
     "Adapted from [https://github.com/venuv/langchain_yt_tools](https://github.com/venuv/langchain_yt_tools)"
    ]
@@ -117,7 +117,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/tools/zapier.ipynb b/docs/extras/integrations/tools/zapier.ipynb
index 17bd9cdd754..e11d9e91385 100644
--- a/docs/extras/integrations/tools/zapier.ipynb
+++ b/docs/extras/integrations/tools/zapier.ipynb
@@ -5,15 +5,12 @@
    "id": "16763ed3",
    "metadata": {},
    "source": [
-    "# Zapier Natural Language Actions API\n",
-    "\\\n",
-    "Full docs here: https://nla.zapier.com/start/\n",
+    "# Zapier Natural Language Actions\n",
     "\n",
-    "**Zapier Natural Language Actions** gives you access to the 5k+ apps, 20k+ actions on Zapier's platform through a natural language API interface.\n",
-    "\n",
-    "NLA supports apps like Gmail, Salesforce, Trello, Slack, Asana, HubSpot, Google Sheets, Microsoft Teams, and thousands more apps: https://zapier.com/apps\n",
-    "\n",
-    "Zapier NLA handles ALL the underlying API auth and translation from natural language --> underlying API call --> return simplified output for LLMs. The key idea is you, or your users, expose a set of actions via an oauth-like setup window, which you can then query and execute via a REST API.\n",
+    ">[Zapier Natural Language Actions](https://nla.zapier.com/start/) gives you access to the 5k+ apps, 20k+ actions on Zapier's platform through a natural language API interface.\n",
+    ">\n",
+    ">NLA supports apps like `Gmail`, `Salesforce`, `Trello`, `Slack`, `Asana`, `HubSpot`, `Google Sheets`, `Microsoft Teams`, and thousands more apps: https://zapier.com/apps\n",
+    ">`Zapier NLA` handles ALL the underlying API auth and translation from natural language --> underlying API call --> return simplified output for LLMs. The key idea is you, or your users, expose a set of actions via an oauth-like setup window, which you can then query and execute via a REST API.\n",
     "\n",
     "NLA offers both API Key and OAuth for signing NLA API requests.\n",
     "\n",
@@ -21,7 +18,7 @@
     "\n",
     "2. User-facing (Oauth): for production scenarios where you are deploying an end-user facing application and LangChain needs access to end-user's exposed actions and connected accounts on Zapier.com\n",
     "\n",
-    "This quick start will focus mostly on the server-side use case for brevity. Jump to [Example Using OAuth Access Token](#oauth) to see a short example how to set up Zapier for user-facing situations. Review [full docs](https://nla.zapier.com/start/) for full user-facing oauth developer support.\n",
+    "This quick start focus mostly on the server-side use case for brevity. Jump to [Example Using OAuth Access Token](#oauth) to see a short example how to set up Zapier for user-facing situations. Review [full docs](https://nla.zapier.com/start/) for full user-facing oauth developer support.\n",
     "\n",
     "This example goes over how to use the Zapier integration with a `SimpleSequentialChain`, then an `Agent`.\n",
     "In code, below:"
@@ -369,7 +366,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

From b31475c6223fb02f6e512ad77dee01249ab4a699 Mon Sep 17 00:00:00 2001
From: Nikhil Suresh <nikhilsuresh097@gmail.com>
Date: Tue, 29 Aug 2023 23:13:31 +0000
Subject: [PATCH 50/58] minor updates to regex

---
 libs/langchain/langchain/chains/qa_with_sources/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py
index 79b083de0b7..3e3023ce2b2 100644
--- a/libs/langchain/langchain/chains/qa_with_sources/base.py
+++ b/libs/langchain/langchain/chains/qa_with_sources/base.py
@@ -120,9 +120,9 @@ class BaseQAWithSourcesChain(Chain, ABC):
 
     def _split_sources(self, answer: str) -> Tuple[str, str]:
         """Split sources from answer."""
-        if re.search(r"SOURCES?[:]\s", answer, re.IGNORECASE):
+        if re.search(r"SOURCES?:", answer, re.IGNORECASE):
             answer, sources = re.split(
-                r"SOURCES?[:]\s|QUESTION:\s", answer, flags=re.IGNORECASE
+                r"SOURCES?:|QUESTION:\s", answer, flags=re.IGNORECASE
             )[:2]
             sources = re.split(r"\n", sources)[0].strip()
         else:

From cedfad541db71c409cf3adb5904494749fe4d1ee Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Tue, 29 Aug 2023 16:14:32 -0700
Subject: [PATCH 51/58] don't emit none from eval config (#9963)

---
 libs/langchain/langchain/smith/evaluation/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libs/langchain/langchain/smith/evaluation/config.py b/libs/langchain/langchain/smith/evaluation/config.py
index 9348e2d81cb..3f8224d7c18 100644
--- a/libs/langchain/langchain/smith/evaluation/config.py
+++ b/libs/langchain/langchain/smith/evaluation/config.py
@@ -48,6 +48,8 @@ class EvalConfig(BaseModel):
         for field, val in self:
             if field == "evaluator_type":
                 continue
+            elif val is None:
+                continue
             kwargs[field] = val
         return kwargs
 

From 56a0165a4ecdf74c51b359026d862e03ded4a876 Mon Sep 17 00:00:00 2001
From: Nikhil Suresh <nikhilsuresh097@gmail.com>
Date: Tue, 29 Aug 2023 23:37:54 +0000
Subject: [PATCH 52/58] cleaned up unit test example

---
 .../langchain/tests/unit_tests/chains/test_qa_with_sources.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
index 3d34206f06d..825284e2949 100644
--- a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
+++ b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py
@@ -28,9 +28,9 @@ from tests.unit_tests.llms.fake_llm import FakeLLM
             "28-pl",
         ),
         (
-            "According to the sources, the agreement is governed by English law.\n"
+            "According to the sources the agreement is governed by English law.\n"
             "Source: 28-pl",
-            "According to the sources, the agreement is governed by English law.\n",
+            "According to the sources the agreement is governed by English law.\n",
             "28-pl",
         ),
         (

From ffa56251347c1a4e417a2f7299f73e299b63bace Mon Sep 17 00:00:00 2001
From: axiangcoding <wyxworkmail@163.com>
Date: Wed, 30 Aug 2023 09:20:06 +0800
Subject: [PATCH 53/58] feat(llms): improve ERNIE-Bot chat model (#9833)

- Description: improve ERNIE-Bot chat model, add request timeout and
more testcases.
  - Issue: None
  - Dependencies: None
  - Tag maintainer: @baskaryan

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 libs/langchain/langchain/chat_models/ernie.py | 15 ++++++++++++
 .../chat_models/test_ernie.py                 |  6 +++--
 .../unit_tests/chat_models/test_ernie.py      | 23 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/libs/langchain/langchain/chat_models/ernie.py b/libs/langchain/langchain/chat_models/ernie.py
index 1ccecf36fe6..d3fdce5c31f 100644
--- a/libs/langchain/langchain/chat_models/ernie.py
+++ b/libs/langchain/langchain/chat_models/ernie.py
@@ -57,12 +57,25 @@ class ErnieBotChat(BaseChatModel):
     """
 
     ernie_client_id: Optional[str] = None
+    """Baidu application client id"""
+
     ernie_client_secret: Optional[str] = None
+    """Baidu application client secret"""
+
     access_token: Optional[str] = None
+    """access token is generated by client id and client secret, 
+    setting this value directly will cause an error"""
 
     model_name: str = "ERNIE-Bot-turbo"
+    """model name of ernie, default is `ERNIE-Bot-turbo`.
+      Currently supported `ERNIE-Bot-turbo`, `ERNIE-Bot`"""
+
+    request_timeout: Optional[int] = 60
+    """request timeout for chat http requests"""
 
     streaming: Optional[bool] = False
+    """streaming mode. not supported yet."""
+
     top_p: Optional[float] = 0.8
     temperature: Optional[float] = 0.95
     penalty_score: Optional[float] = 1
@@ -93,6 +106,7 @@ class ErnieBotChat(BaseChatModel):
             raise ValueError(f"Got unknown model_name {self.model_name}")
         resp = requests.post(
             url,
+            timeout=self.request_timeout,
             headers={
                 "Content-Type": "application/json",
             },
@@ -107,6 +121,7 @@ class ErnieBotChat(BaseChatModel):
             base_url: str = "https://aip.baidubce.com/oauth/2.0/token"
             resp = requests.post(
                 base_url,
+                timeout=10,
                 headers={
                     "Content-Type": "application/json",
                     "Accept": "application/json",
diff --git a/libs/langchain/tests/integration_tests/chat_models/test_ernie.py b/libs/langchain/tests/integration_tests/chat_models/test_ernie.py
index a8a80ed9bbd..4b79f40d093 100644
--- a/libs/langchain/tests/integration_tests/chat_models/test_ernie.py
+++ b/libs/langchain/tests/integration_tests/chat_models/test_ernie.py
@@ -45,12 +45,14 @@ def test_extra_kwargs() -> None:
 def test_wrong_temperature_1() -> None:
     chat = ErnieBotChat()
     message = HumanMessage(content="Hello")
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError) as e:
         chat([message], temperature=1.2)
+    assert "parameter check failed, temperature range is (0, 1.0]" in str(e)
 
 
 def test_wrong_temperature_2() -> None:
     chat = ErnieBotChat()
     message = HumanMessage(content="Hello")
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError) as e:
         chat([message], temperature=0)
+    assert "parameter check failed, temperature range is (0, 1.0]" in str(e)
diff --git a/libs/langchain/tests/unit_tests/chat_models/test_ernie.py b/libs/langchain/tests/unit_tests/chat_models/test_ernie.py
index 06eb1019615..a8417017f5a 100644
--- a/libs/langchain/tests/unit_tests/chat_models/test_ernie.py
+++ b/libs/langchain/tests/unit_tests/chat_models/test_ernie.py
@@ -1,5 +1,12 @@
+import pytest
+
 from langchain.chat_models.ernie import _convert_message_to_dict
-from langchain.schema.messages import AIMessage, HumanMessage
+from langchain.schema.messages import (
+    AIMessage,
+    FunctionMessage,
+    HumanMessage,
+    SystemMessage,
+)
 
 
 def test__convert_dict_to_message_human() -> None:
@@ -14,3 +21,17 @@ def test__convert_dict_to_message_ai() -> None:
     result = _convert_message_to_dict(message)
     expected_output = {"role": "assistant", "content": "foo"}
     assert result == expected_output
+
+
+def test__convert_dict_to_message_system() -> None:
+    message = SystemMessage(content="foo")
+    with pytest.raises(ValueError) as e:
+        _convert_message_to_dict(message)
+    assert "Got unknown type" in str(e)
+
+
+def test__convert_dict_to_message_function() -> None:
+    message = FunctionMessage(name="foo", content="bar")
+    with pytest.raises(ValueError) as e:
+        _convert_message_to_dict(message)
+    assert "Got unknown type" in str(e)

From a05fed9369f0afdb5870811011cf8dc4d411989c Mon Sep 17 00:00:00 2001
From: Jurik-001 <123458520+Jurik-001@users.noreply.github.com>
Date: Wed, 30 Aug 2023 04:23:44 +0200
Subject: [PATCH 54/58] Fix add callbacks to spark_sql due to depreciation of
 callback_manager (#9831)

Description: Due to depreciation (regarding to line 109 in
[langchain/libs/langchain/langchain/chains/base.py](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/chains/base.py)
of callback_manager i replaced several parts

Issue: None
Dependencies:
Maintainer: @baskaryan

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 libs/langchain/langchain/agents/agent.py                   | 7 +++++--
 .../langchain/agents/agent_toolkits/spark_sql/base.py      | 5 ++++-
 libs/langchain/langchain/chains/base.py                    | 6 ++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/libs/langchain/langchain/agents/agent.py b/libs/langchain/langchain/agents/agent.py
index 1294b1718f8..bc266d7c1e5 100644
--- a/libs/langchain/langchain/agents/agent.py
+++ b/libs/langchain/langchain/agents/agent.py
@@ -686,12 +686,15 @@ s
         cls,
         agent: Union[BaseSingleActionAgent, BaseMultiActionAgent],
         tools: Sequence[BaseTool],
-        callback_manager: Optional[BaseCallbackManager] = None,
+        callbacks: Callbacks = None,
         **kwargs: Any,
     ) -> AgentExecutor:
         """Create from agent and tools."""
         return cls(
-            agent=agent, tools=tools, callback_manager=callback_manager, **kwargs
+            agent=agent,
+            tools=tools,
+            callbacks=callbacks,
+            **kwargs,
         )
 
     @root_validator()
diff --git a/libs/langchain/langchain/agents/agent_toolkits/spark_sql/base.py b/libs/langchain/langchain/agents/agent_toolkits/spark_sql/base.py
index f6e2e209ea6..e70dc7f0509 100644
--- a/libs/langchain/langchain/agents/agent_toolkits/spark_sql/base.py
+++ b/libs/langchain/langchain/agents/agent_toolkits/spark_sql/base.py
@@ -6,7 +6,7 @@ from langchain.agents.agent_toolkits.spark_sql.prompt import SQL_PREFIX, SQL_SUF
 from langchain.agents.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
 from langchain.agents.mrkl.base import ZeroShotAgent
 from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
-from langchain.callbacks.base import BaseCallbackManager
+from langchain.callbacks.base import BaseCallbackManager, Callbacks
 from langchain.chains.llm import LLMChain
 from langchain.schema.language_model import BaseLanguageModel
 
@@ -15,6 +15,7 @@ def create_spark_sql_agent(
     llm: BaseLanguageModel,
     toolkit: SparkSQLToolkit,
     callback_manager: Optional[BaseCallbackManager] = None,
+    callbacks: Callbacks = None,
     prefix: str = SQL_PREFIX,
     suffix: str = SQL_SUFFIX,
     format_instructions: str = FORMAT_INSTRUCTIONS,
@@ -41,6 +42,7 @@ def create_spark_sql_agent(
         llm=llm,
         prompt=prompt,
         callback_manager=callback_manager,
+        callbacks=callbacks,
     )
     tool_names = [tool.name for tool in tools]
     agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
@@ -48,6 +50,7 @@ def create_spark_sql_agent(
         agent=agent,
         tools=tools,
         callback_manager=callback_manager,
+        callbacks=callbacks,
         verbose=verbose,
         max_iterations=max_iterations,
         max_execution_time=max_execution_time,
diff --git a/libs/langchain/langchain/chains/base.py b/libs/langchain/langchain/chains/base.py
index 5a21dc6a661..701da6c375b 100644
--- a/libs/langchain/langchain/chains/base.py
+++ b/libs/langchain/langchain/chains/base.py
@@ -136,6 +136,12 @@ class Chain(Serializable, Runnable[Dict[str, Any], Dict[str, Any]], ABC):
     def raise_callback_manager_deprecation(cls, values: Dict) -> Dict:
         """Raise deprecation warning if callback_manager is used."""
         if values.get("callback_manager") is not None:
+            if values.get("callbacks") is not None:
+                raise ValueError(
+                    "Cannot specify both callback_manager and callbacks. "
+                    "callback_manager is deprecated, callbacks is the preferred "
+                    "parameter to pass in."
+                )
             warnings.warn(
                 "callback_manager is deprecated. Please use callbacks instead.",
                 DeprecationWarning,

From c844aaa7a6a0ab847f8077ac07db947a7f9aa9e8 Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Tue, 29 Aug 2023 19:27:22 -0700
Subject: [PATCH 55/58] Weakref to tracer (#9954)

Prevent memory/thread leakage
---
 .../langchain/callbacks/tracers/langchain.py  | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/libs/langchain/langchain/callbacks/tracers/langchain.py b/libs/langchain/langchain/callbacks/tracers/langchain.py
index 57b57ee2705..0f576977216 100644
--- a/libs/langchain/langchain/callbacks/tracers/langchain.py
+++ b/libs/langchain/langchain/callbacks/tracers/langchain.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 
 import logging
 import os
+import weakref
 from concurrent.futures import Future, ThreadPoolExecutor, wait
 from datetime import datetime
 from typing import Any, Callable, Dict, List, Optional, Set, Union
@@ -18,8 +19,10 @@ from langchain.schema.messages import BaseMessage
 
 logger = logging.getLogger(__name__)
 _LOGGED = set()
-_TRACERS: List[LangChainTracer] = []
+_TRACERS: weakref.WeakSet[LangChainTracer] = weakref.WeakSet()
 _CLIENT: Optional[Client] = None
+_MAX_EXECUTORS = 10  # TODO: Remove once write queue is implemented
+_EXECUTORS: List[ThreadPoolExecutor] = []
 
 
 def log_error_once(method: str, exception: Exception) -> None:
@@ -34,8 +37,9 @@ def log_error_once(method: str, exception: Exception) -> None:
 def wait_for_all_tracers() -> None:
     """Wait for all tracers to finish."""
     global _TRACERS
-    for tracer in _TRACERS:
-        tracer.wait_for_futures()
+    for tracer in list(_TRACERS):
+        if tracer is not None:
+            tracer.wait_for_futures()
 
 
 def _get_client() -> Client:
@@ -68,17 +72,22 @@ class LangChainTracer(BaseTracer):
             "LANGCHAIN_PROJECT", os.getenv("LANGCHAIN_SESSION", "default")
         )
         if use_threading:
-            # set max_workers to 1 to process tasks in order
-            self.executor: Optional[ThreadPoolExecutor] = ThreadPoolExecutor(
-                max_workers=1
-            )
+            global _MAX_EXECUTORS
+            if len(_EXECUTORS) < _MAX_EXECUTORS:
+                self.executor: Optional[ThreadPoolExecutor] = ThreadPoolExecutor(
+                    max_workers=1
+                )
+                _EXECUTORS.append(self.executor)
+            else:
+                self.executor = _EXECUTORS.pop(0)
+                _EXECUTORS.append(self.executor)
         else:
             self.executor = None
         self.client = client or _get_client()
         self._futures: Set[Future] = set()
         self.tags = tags or []
         global _TRACERS
-        _TRACERS.append(self)
+        _TRACERS.add(self)
 
     def on_chat_model_start(
         self,

From 6a51672164d7308d5b7c0700f9916b94e555096b Mon Sep 17 00:00:00 2001
From: Arjun Aravindan <aaravindan101@gmail.com>
Date: Tue, 29 Aug 2023 22:45:18 -0400
Subject: [PATCH 56/58] Update SeleniumURLLoader to use webdriver Service in
 favor of deprecated executable_path parameter (#9814)

Description: This commit uses the new Service object in Selenium
webdriver as executable_path has been [deprecated and removed in
selenium version
4.11.2](https://github.com/SeleniumHQ/selenium/commit/9f5801c82fb3be3d5850707c46c3f8176e3ccd8e)
Issue: https://github.com/langchain-ai/langchain/issues/9808
Tag Maintainer: @eyurtsev
---
 .../langchain/document_loaders/url_selenium.py         | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libs/langchain/langchain/document_loaders/url_selenium.py b/libs/langchain/langchain/document_loaders/url_selenium.py
index e47da8b5fc7..a4419c8b866 100644
--- a/libs/langchain/langchain/document_loaders/url_selenium.py
+++ b/libs/langchain/langchain/document_loaders/url_selenium.py
@@ -74,6 +74,7 @@ class SeleniumURLLoader(BaseLoader):
         if self.browser.lower() == "chrome":
             from selenium.webdriver import Chrome
             from selenium.webdriver.chrome.options import Options as ChromeOptions
+            from selenium.webdriver.chrome.service import Service
 
             chrome_options = ChromeOptions()
 
@@ -87,10 +88,14 @@ class SeleniumURLLoader(BaseLoader):
                 chrome_options.binary_location = self.binary_location
             if self.executable_path is None:
                 return Chrome(options=chrome_options)
-            return Chrome(executable_path=self.executable_path, options=chrome_options)
+            return Chrome(
+                options=chrome_options,
+                service=Service(executable_path=self.executable_path),
+            )
         elif self.browser.lower() == "firefox":
             from selenium.webdriver import Firefox
             from selenium.webdriver.firefox.options import Options as FirefoxOptions
+            from selenium.webdriver.firefox.service import Service
 
             firefox_options = FirefoxOptions()
 
@@ -104,7 +109,8 @@ class SeleniumURLLoader(BaseLoader):
             if self.executable_path is None:
                 return Firefox(options=firefox_options)
             return Firefox(
-                executable_path=self.executable_path, options=firefox_options
+                options=firefox_options,
+                service=Service(executable_path=self.executable_path),
             )
         else:
             raise ValueError("Invalid browser specified. Use 'chrome' or 'firefox'.")

From d762a6b51feb3d1f8f60a9a9ccdb7d432aa07b53 Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Tue, 29 Aug 2023 20:36:27 -0700
Subject: [PATCH 57/58] rm mutable defaults (#9974)

---
 libs/langchain/langchain/callbacks/mlflow_callback.py     | 4 ++--
 .../langchain/langchain/callbacks/promptlayer_callback.py | 4 ++--
 libs/langchain/langchain/document_loaders/async_html.py   | 4 ++--
 libs/langchain/langchain/graphs/nebula_graph.py           | 5 +++--
 libs/langchain/langchain/llms/symblai_nebula.py           | 3 ++-
 libs/langchain/langchain/vectorstores/marqo.py            | 8 ++++----
 libs/langchain/langchain/vectorstores/redis/base.py       | 6 ++++--
 libs/langchain/langchain/vectorstores/singlestoredb.py    | 3 ++-
 libs/langchain/langchain/vectorstores/vectara.py          | 4 ++--
 libs/langchain/langchain/vectorstores/zilliz.py           | 6 +++---
 10 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/libs/langchain/langchain/callbacks/mlflow_callback.py b/libs/langchain/langchain/callbacks/mlflow_callback.py
index baa297af544..c51db69bf03 100644
--- a/libs/langchain/langchain/callbacks/mlflow_callback.py
+++ b/libs/langchain/langchain/callbacks/mlflow_callback.py
@@ -242,7 +242,7 @@ class MlflowCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
         self,
         name: Optional[str] = "langchainrun-%",
         experiment: Optional[str] = "langchain",
-        tags: Optional[Dict] = {},
+        tags: Optional[Dict] = None,
         tracking_uri: Optional[str] = None,
     ) -> None:
         """Initialize callback handler."""
@@ -254,7 +254,7 @@ class MlflowCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
 
         self.name = name
         self.experiment = experiment
-        self.tags = tags
+        self.tags = tags or {}
         self.tracking_uri = tracking_uri
 
         self.temp_dir = tempfile.TemporaryDirectory()
diff --git a/libs/langchain/langchain/callbacks/promptlayer_callback.py b/libs/langchain/langchain/callbacks/promptlayer_callback.py
index bd93d70879e..749bb6b0064 100644
--- a/libs/langchain/langchain/callbacks/promptlayer_callback.py
+++ b/libs/langchain/langchain/callbacks/promptlayer_callback.py
@@ -40,12 +40,12 @@ class PromptLayerCallbackHandler(BaseCallbackHandler):
     def __init__(
         self,
         pl_id_callback: Optional[Callable[..., Any]] = None,
-        pl_tags: Optional[List[str]] = [],
+        pl_tags: Optional[List[str]] = None,
     ) -> None:
         """Initialize the PromptLayerCallbackHandler."""
         _lazy_import_promptlayer()
         self.pl_id_callback = pl_id_callback
-        self.pl_tags = pl_tags
+        self.pl_tags = pl_tags or []
         self.runs: Dict[UUID, Dict[str, Any]] = {}
 
     def on_chat_model_start(
diff --git a/libs/langchain/langchain/document_loaders/async_html.py b/libs/langchain/langchain/document_loaders/async_html.py
index ce54573ff99..286319a5ee6 100644
--- a/libs/langchain/langchain/document_loaders/async_html.py
+++ b/libs/langchain/langchain/document_loaders/async_html.py
@@ -33,7 +33,7 @@ class AsyncHtmlLoader(BaseLoader):
         verify_ssl: Optional[bool] = True,
         proxies: Optional[dict] = None,
         requests_per_second: int = 2,
-        requests_kwargs: Dict[str, Any] = {},
+        requests_kwargs: Optional[Dict[str, Any]] = None,
         raise_for_status: bool = False,
     ):
         """Initialize with a webpage path."""
@@ -67,7 +67,7 @@ class AsyncHtmlLoader(BaseLoader):
             self.session.proxies.update(proxies)
 
         self.requests_per_second = requests_per_second
-        self.requests_kwargs = requests_kwargs
+        self.requests_kwargs = requests_kwargs or {}
         self.raise_for_status = raise_for_status
 
     async def _fetch(
diff --git a/libs/langchain/langchain/graphs/nebula_graph.py b/libs/langchain/langchain/graphs/nebula_graph.py
index c74b9fb854f..8a031e372be 100644
--- a/libs/langchain/langchain/graphs/nebula_graph.py
+++ b/libs/langchain/langchain/graphs/nebula_graph.py
@@ -1,6 +1,6 @@
 import logging
 from string import Template
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 logger = logging.getLogger(__name__)
 
@@ -106,11 +106,12 @@ class NebulaGraph:
         """Returns the schema of the NebulaGraph database"""
         return self.schema
 
-    def execute(self, query: str, params: dict = {}, retry: int = 0) -> Any:
+    def execute(self, query: str, params: Optional[dict] = None, retry: int = 0) -> Any:
         """Query NebulaGraph database."""
         from nebula3.Exception import IOErrorException, NoValidSessionException
         from nebula3.fbthrift.transport.TTransport import TTransportException
 
+        params = params or {}
         try:
             result = self.session_pool.execute_parameter(query, params)
             if not result.is_succeeded():
diff --git a/libs/langchain/langchain/llms/symblai_nebula.py b/libs/langchain/langchain/llms/symblai_nebula.py
index d368c3a1a0d..8d33e1a42c0 100644
--- a/libs/langchain/langchain/llms/symblai_nebula.py
+++ b/libs/langchain/langchain/llms/symblai_nebula.py
@@ -183,9 +183,10 @@ def make_request(
     instruction: str,
     conversation: str,
     url: str = f"{DEFAULT_NEBULA_SERVICE_URL}{DEFAULT_NEBULA_SERVICE_PATH}",
-    params: Dict = {},
+    params: Optional[Dict] = None,
 ) -> Any:
     """Generate text from the model."""
+    params = params or {}
     headers = {
         "Content-Type": "application/json",
         "ApiKey": f"{self.nebula_api_key}",
diff --git a/libs/langchain/langchain/vectorstores/marqo.py b/libs/langchain/langchain/vectorstores/marqo.py
index b18731e08c1..43f7172071a 100644
--- a/libs/langchain/langchain/vectorstores/marqo.py
+++ b/libs/langchain/langchain/vectorstores/marqo.py
@@ -372,10 +372,10 @@ class Marqo(VectorStore):
         index_name: str = "",
         url: str = "http://localhost:8882",
         api_key: str = "",
-        add_documents_settings: Optional[Dict[str, Any]] = {},
+        add_documents_settings: Optional[Dict[str, Any]] = None,
         searchable_attributes: Optional[List[str]] = None,
         page_content_builder: Optional[Callable[[Dict[str, str]], str]] = None,
-        index_settings: Optional[Dict[str, Any]] = {},
+        index_settings: Optional[Dict[str, Any]] = None,
         verbose: bool = True,
         **kwargs: Any,
     ) -> Marqo:
@@ -435,7 +435,7 @@ class Marqo(VectorStore):
         client = marqo.Client(url=url, api_key=api_key)
 
         try:
-            client.create_index(index_name, settings_dict=index_settings)
+            client.create_index(index_name, settings_dict=index_settings or {})
             if verbose:
                 print(f"Created {index_name} successfully.")
         except Exception:
@@ -446,7 +446,7 @@ class Marqo(VectorStore):
             client,
             index_name,
             searchable_attributes=searchable_attributes,
-            add_documents_settings=add_documents_settings,
+            add_documents_settings=add_documents_settings or {},
             page_content_builder=page_content_builder,
         )
         instance.add_texts(texts, metadatas)
diff --git a/libs/langchain/langchain/vectorstores/redis/base.py b/libs/langchain/langchain/vectorstores/redis/base.py
index f3e966d3c11..a09ba44cba0 100644
--- a/libs/langchain/langchain/vectorstores/redis/base.py
+++ b/libs/langchain/langchain/vectorstores/redis/base.py
@@ -991,7 +991,7 @@ class Redis(VectorStore):
         self,
         k: int,
         filter: Optional[RedisFilterExpression] = None,
-        return_fields: List[str] = [],
+        return_fields: Optional[List[str]] = None,
     ) -> "Query":
         try:
             from redis.commands.search.query import Query
@@ -1000,6 +1000,7 @@ class Redis(VectorStore):
                 "Could not import redis python package. "
                 "Please install it with `pip install redis`."
             ) from e
+        return_fields = return_fields or []
         vector_key = self._schema.content_vector_key
         base_query = f"@{vector_key}:[VECTOR_RANGE $distance_threshold $vector]"
 
@@ -1020,7 +1021,7 @@ class Redis(VectorStore):
         self,
         k: int,
         filter: Optional[RedisFilterExpression] = None,
-        return_fields: List[str] = [],
+        return_fields: Optional[List[str]] = None,
     ) -> "Query":
         """Prepare query for vector search.
 
@@ -1038,6 +1039,7 @@ class Redis(VectorStore):
                 "Could not import redis python package. "
                 "Please install it with `pip install redis`."
             ) from e
+        return_fields = return_fields or []
         query_prefix = "*"
         if filter:
             query_prefix = f"{str(filter)}"
diff --git a/libs/langchain/langchain/vectorstores/singlestoredb.py b/libs/langchain/langchain/vectorstores/singlestoredb.py
index 983f3f7f030..35a955807f8 100644
--- a/libs/langchain/langchain/vectorstores/singlestoredb.py
+++ b/libs/langchain/langchain/vectorstores/singlestoredb.py
@@ -345,8 +345,9 @@ class SingleStoreDB(VectorStore):
             def build_where_clause(
                 where_clause_values: List[Any],
                 sub_filter: dict,
-                prefix_args: List[str] = [],
+                prefix_args: Optional[List[str]] = None,
             ) -> None:
+                prefix_args = prefix_args or []
                 for key in sub_filter.keys():
                     if isinstance(sub_filter[key], dict):
                         build_where_clause(
diff --git a/libs/langchain/langchain/vectorstores/vectara.py b/libs/langchain/langchain/vectorstores/vectara.py
index 9af1124648b..457511b1045 100644
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@@ -463,7 +463,7 @@ class VectaraRetriever(VectorStoreRetriever):
         self,
         texts: List[str],
         metadatas: Optional[List[dict]] = None,
-        doc_metadata: Optional[dict] = {},
+        doc_metadata: Optional[dict] = None,
     ) -> None:
         """Add text to the Vectara vectorstore.
 
@@ -471,4 +471,4 @@ class VectaraRetriever(VectorStoreRetriever):
             texts (List[str]): The text
             metadatas (List[dict]): Metadata dicts, must line up with existing store
         """
-        self.vectorstore.add_texts(texts, metadatas, doc_metadata)
+        self.vectorstore.add_texts(texts, metadatas, doc_metadata or {})
diff --git a/libs/langchain/langchain/vectorstores/zilliz.py b/libs/langchain/langchain/vectorstores/zilliz.py
index 8a571aca3bf..e62bfb1aa88 100644
--- a/libs/langchain/langchain/vectorstores/zilliz.py
+++ b/libs/langchain/langchain/vectorstores/zilliz.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional
 
 from langchain.embeddings.base import Embeddings
 from langchain.vectorstores.milvus import Milvus
@@ -140,7 +140,7 @@ class Zilliz(Milvus):
         embedding: Embeddings,
         metadatas: Optional[List[dict]] = None,
         collection_name: str = "LangChainCollection",
-        connection_args: dict[str, Any] = {},
+        connection_args: Optional[Dict[str, Any]] = None,
         consistency_level: str = "Session",
         index_params: Optional[dict] = None,
         search_params: Optional[dict] = None,
@@ -173,7 +173,7 @@ class Zilliz(Milvus):
         vector_db = cls(
             embedding_function=embedding,
             collection_name=collection_name,
-            connection_args=connection_args,
+            connection_args=connection_args or {},
             consistency_level=consistency_level,
             index_params=index_params,
             search_params=search_params,

From 2d2b097fab8eb6f3626c86cb958d8cc762257061 Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Tue, 29 Aug 2023 21:41:32 -0700
Subject: [PATCH 58/58] mv chat history (#9725)

---
 libs/langchain/langchain/schema/__init__.py   |  5 +-
 .../langchain/schema/chat_history.py          | 67 +++++++++++++++++++
 libs/langchain/langchain/schema/memory.py     | 62 -----------------
 3 files changed, 70 insertions(+), 64 deletions(-)
 create mode 100644 libs/langchain/langchain/schema/chat_history.py

diff --git a/libs/langchain/langchain/schema/__init__.py b/libs/langchain/langchain/schema/__init__.py
index 15557f5f3c9..aec8a1e3919 100644
--- a/libs/langchain/langchain/schema/__init__.py
+++ b/libs/langchain/langchain/schema/__init__.py
@@ -1,8 +1,9 @@
 """**Schemas** are the LangChain Base Classes and Interfaces."""
 from langchain.schema.agent import AgentAction, AgentFinish
+from langchain.schema.chat_history import BaseChatMessageHistory
 from langchain.schema.document import BaseDocumentTransformer, Document
 from langchain.schema.exceptions import LangChainException
-from langchain.schema.memory import BaseChatMessageHistory, BaseMemory
+from langchain.schema.memory import BaseMemory
 from langchain.schema.messages import (
     AIMessage,
     BaseMessage,
@@ -40,10 +41,10 @@ Memory = BaseMemory
 __all__ = [
     "BaseMemory",
     "BaseStore",
-    "BaseChatMessageHistory",
     "AgentFinish",
     "AgentAction",
     "Document",
+    "BaseChatMessageHistory",
     "BaseDocumentTransformer",
     "BaseMessage",
     "ChatMessage",
diff --git a/libs/langchain/langchain/schema/chat_history.py b/libs/langchain/langchain/schema/chat_history.py
new file mode 100644
index 00000000000..1f74ed0cd25
--- /dev/null
+++ b/libs/langchain/langchain/schema/chat_history.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import List
+
+from langchain.schema.messages import AIMessage, BaseMessage, HumanMessage
+
+
+class BaseChatMessageHistory(ABC):
+    """Abstract base class for storing chat message history.
+
+    See `ChatMessageHistory` for default implementation.
+
+    Example:
+        .. code-block:: python
+
+            class FileChatMessageHistory(BaseChatMessageHistory):
+                storage_path:  str
+                session_id: str
+
+               @property
+               def messages(self):
+                   with open(os.path.join(storage_path, session_id), 'r:utf-8') as f:
+                       messages = json.loads(f.read())
+                    return messages_from_dict(messages)
+
+               def add_message(self, message: BaseMessage) -> None:
+                   messages = self.messages.append(_message_to_dict(message))
+                   with open(os.path.join(storage_path, session_id), 'w') as f:
+                       json.dump(f, messages)
+
+               def clear(self):
+                   with open(os.path.join(storage_path, session_id), 'w') as f:
+                       f.write("[]")
+    """
+
+    messages: List[BaseMessage]
+    """A list of Messages stored in-memory."""
+
+    def add_user_message(self, message: str) -> None:
+        """Convenience method for adding a human message string to the store.
+
+        Args:
+            message: The string contents of a human message.
+        """
+        self.add_message(HumanMessage(content=message))
+
+    def add_ai_message(self, message: str) -> None:
+        """Convenience method for adding an AI message string to the store.
+
+        Args:
+            message: The string contents of an AI message.
+        """
+        self.add_message(AIMessage(content=message))
+
+    @abstractmethod
+    def add_message(self, message: BaseMessage) -> None:
+        """Add a Message object to the store.
+
+        Args:
+            message: A BaseMessage object to store.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def clear(self) -> None:
+        """Remove all messages from the store"""
diff --git a/libs/langchain/langchain/schema/memory.py b/libs/langchain/langchain/schema/memory.py
index fe1b784440a..cd4d572f985 100644
--- a/libs/langchain/langchain/schema/memory.py
+++ b/libs/langchain/langchain/schema/memory.py
@@ -4,7 +4,6 @@ from abc import ABC, abstractmethod
 from typing import Any, Dict, List
 
 from langchain.load.serializable import Serializable
-from langchain.schema.messages import AIMessage, BaseMessage, HumanMessage
 
 
 class BaseMemory(Serializable, ABC):
@@ -58,64 +57,3 @@ class BaseMemory(Serializable, ABC):
     @abstractmethod
     def clear(self) -> None:
         """Clear memory contents."""
-
-
-class BaseChatMessageHistory(ABC):
-    """Abstract base class for storing chat message history.
-
-    See `ChatMessageHistory` for default implementation.
-
-    Example:
-        .. code-block:: python
-
-            class FileChatMessageHistory(BaseChatMessageHistory):
-                storage_path:  str
-                session_id: str
-
-               @property
-               def messages(self):
-                   with open(os.path.join(storage_path, session_id), 'r:utf-8') as f:
-                       messages = json.loads(f.read())
-                    return messages_from_dict(messages)
-
-               def add_message(self, message: BaseMessage) -> None:
-                   messages = self.messages.append(_message_to_dict(message))
-                   with open(os.path.join(storage_path, session_id), 'w') as f:
-                       json.dump(f, messages)
-
-               def clear(self):
-                   with open(os.path.join(storage_path, session_id), 'w') as f:
-                       f.write("[]")
-    """
-
-    messages: List[BaseMessage]
-    """A list of Messages stored in-memory."""
-
-    def add_user_message(self, message: str) -> None:
-        """Convenience method for adding a human message string to the store.
-
-        Args:
-            message: The string contents of a human message.
-        """
-        self.add_message(HumanMessage(content=message))
-
-    def add_ai_message(self, message: str) -> None:
-        """Convenience method for adding an AI message string to the store.
-
-        Args:
-            message: The string contents of an AI message.
-        """
-        self.add_message(AIMessage(content=message))
-
-    @abstractmethod
-    def add_message(self, message: BaseMessage) -> None:
-        """Add a Message object to the store.
-
-        Args:
-            message: A BaseMessage object to store.
-        """
-        raise NotImplementedError()
-
-    @abstractmethod
-    def clear(self) -> None:
-        """Remove all messages from the store"""