From 5edf819524e25bdbe81e728f305ee1bb11be5843 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Mon, 28 Aug 2023 09:30:59 -0400
Subject: [PATCH 01/19] Qdrant Client: Expose instance for creating client
 (#9706)

Expose classmethods to convenient initialize the vectostore.

The purpose of this PR is to make it easy for users to initialize an
empty vectorstore that's properly pre-configured without having to index
documents into it via `from_documents`.

This will make it easier for users to rely on the following indexing
code: https://github.com/langchain-ai/langchain/pull/9614
to help manage data in the qdrant vectorstore.
---
 libs/langchain/langchain/vectorstores/qdrant.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libs/langchain/langchain/vectorstores/qdrant.py b/libs/langchain/langchain/vectorstores/qdrant.py
index cdc5bea8efb..0be0766f311 100644
--- a/libs/langchain/langchain/vectorstores/qdrant.py
+++ b/libs/langchain/langchain/vectorstores/qdrant.py
@@ -1298,7 +1298,7 @@ class Qdrant(VectorStore):
                 embeddings = OpenAIEmbeddings()
                 qdrant = Qdrant.from_texts(texts, embeddings, "localhost")
         """
-        qdrant = cls._construct_instance(
+        qdrant = cls.construct_instance(
             texts,
             embedding,
             location,
@@ -1474,7 +1474,7 @@ class Qdrant(VectorStore):
                 embeddings = OpenAIEmbeddings()
                 qdrant = await Qdrant.afrom_texts(texts, embeddings, "localhost")
         """
-        qdrant = await cls._aconstruct_instance(
+        qdrant = await cls.aconstruct_instance(
             texts,
             embedding,
             location,
@@ -1510,7 +1510,7 @@ class Qdrant(VectorStore):
         return qdrant
 
     @classmethod
-    def _construct_instance(
+    def construct_instance(
         cls: Type[Qdrant],
         texts: List[str],
         embedding: Embeddings,
@@ -1676,7 +1676,7 @@ class Qdrant(VectorStore):
         return qdrant
 
     @classmethod
-    async def _aconstruct_instance(
+    async def aconstruct_instance(
         cls: Type[Qdrant],
         texts: List[str],
         embedding: Embeddings,

From f97d3a76e7a500470c856f83d397853d189f9134 Mon Sep 17 00:00:00 2001
From: XUEYANZ <zxy11256@gmail.com>
Date: Mon, 28 Aug 2023 09:38:34 -0400
Subject: [PATCH 02/19] Update CONTRIBUTING.md (#9817)

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - Description: a description of the change,
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. These live is docs/extras
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17, @rlancemartin.
 -->

Hi LangChain :) Thank you for such a great project!
I was going through the CONTRIBUTING.md and found a few minor issues.
---
 .github/CONTRIBUTING.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index e4d1e54e33a..9a03948a724 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -44,7 +44,7 @@ If you are adding an issue, please try to keep it focused on a single, modular b
 If two issues are related, or blocking, please link them rather than combining them.
 
 We will try to keep these issues as up to date as possible, though
-with the rapid rate of develop in this field some may get out of date.
+with the rapid rate of development in this field some may get out of date.
 If you notice this happening, please let us know.
 
 ### 🙋Getting Help
@@ -87,7 +87,7 @@ This will install all requirements for running the package, examples, linting, f
 
 ❗Note: If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running Poetry v1.5.1. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. If you are still seeing this bug on v1.5.1, you may also try disabling "modern installation" (`poetry config installer.modern-installation false`) and re-installing requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
 
-Now, you should be able to run the common tasks in the following section. To double check, run `make test`, all tests should pass. If they don't you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
+Now assuming `make` and `pytest` are installed, you should be able to run the common tasks in the following section. To double check, run `make test` under `libs/langchain`, all tests should pass. If they don't, you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
 
 ## ✅ Common Tasks
 
@@ -134,7 +134,7 @@ We recognize linting can be annoying - if you do not want to do it, please conta
 ### Spellcheck
 
 Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell).
-Note that `codespell` finds common typos, so could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
+Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
 
 To check spelling for this project:
 

From 5e2d0cf54eded18f46ed9c40383a481f85872757 Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Mon, 28 Aug 2023 07:27:07 -0700
Subject: [PATCH 03/19] bump 275 (#9860)

---
 libs/langchain/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
index 3a5f392a9a4..795b5c4a538 100644
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain"
-version = "0.0.274"
+version = "0.0.275"
 description = "Building applications with LLMs through composability"
 authors = []
 license = "MIT"

From cb642ef6584b1d6ec9bded6d207276e0e9e581d1 Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Mon, 28 Aug 2023 09:15:05 -0700
Subject: [PATCH 04/19] Return feedback (#9629)

Return the feedback values in an eval run result

Also made a helper method to display as a dataframe but it may be
overkill
---
 .../langchain/callbacks/tracers/evaluation.py | 20 +++--
 .../smith/evaluation/runner_utils.py          | 90 +++++++++++++------
 .../smith/evaluation/test_runner_utils.py     | 21 ++---
 3 files changed, 86 insertions(+), 45 deletions(-)

diff --git a/libs/langchain/langchain/callbacks/tracers/evaluation.py b/libs/langchain/langchain/callbacks/tracers/evaluation.py
index 877836b0a5c..5b178c84e6e 100644
--- a/libs/langchain/langchain/callbacks/tracers/evaluation.py
+++ b/libs/langchain/langchain/callbacks/tracers/evaluation.py
@@ -3,10 +3,11 @@ from __future__ import annotations
 
 import logging
 from concurrent.futures import Future, ThreadPoolExecutor, wait
-from typing import Any, List, Optional, Sequence, Set, Union
+from typing import Any, Dict, List, Optional, Sequence, Set, Union
 from uuid import UUID
 
-from langsmith import Client, RunEvaluator
+import langsmith
+from langsmith import schemas as langsmith_schemas
 
 from langchain.callbacks.manager import tracing_v2_enabled
 from langchain.callbacks.tracers.base import BaseTracer
@@ -62,13 +63,13 @@ class EvaluatorCallbackHandler(BaseTracer):
         The LangSmith project name to be organize eval chain runs under.
     """
 
-    name: str = "evaluator_callback_handler"
+    name = "evaluator_callback_handler"
 
     def __init__(
         self,
-        evaluators: Sequence[RunEvaluator],
+        evaluators: Sequence[langsmith.RunEvaluator],
         max_workers: Optional[int] = None,
-        client: Optional[Client] = None,
+        client: Optional[langsmith.Client] = None,
         example_id: Optional[Union[UUID, str]] = None,
         skip_unfinished: bool = True,
         project_name: Optional[str] = "evaluators",
@@ -86,10 +87,11 @@ class EvaluatorCallbackHandler(BaseTracer):
         self.futures: Set[Future] = set()
         self.skip_unfinished = skip_unfinished
         self.project_name = project_name
+        self.logged_feedback: Dict[str, List[langsmith_schemas.Feedback]] = {}
         global _TRACERS
         _TRACERS.append(self)
 
-    def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
+    def _evaluate_in_project(self, run: Run, evaluator: langsmith.RunEvaluator) -> None:
         """Evaluate the run in the project.
 
         Parameters
@@ -102,11 +104,11 @@ class EvaluatorCallbackHandler(BaseTracer):
         """
         try:
             if self.project_name is None:
-                self.client.evaluate_run(run, evaluator)
+                feedback = self.client.evaluate_run(run, evaluator)
             with tracing_v2_enabled(
                 project_name=self.project_name, tags=["eval"], client=self.client
             ):
-                self.client.evaluate_run(run, evaluator)
+                feedback = self.client.evaluate_run(run, evaluator)
         except Exception as e:
             logger.error(
                 f"Error evaluating run {run.id} with "
@@ -114,6 +116,8 @@ class EvaluatorCallbackHandler(BaseTracer):
                 exc_info=True,
             )
             raise e
+        example_id = str(run.reference_example_id)
+        self.logged_feedback.setdefault(example_id, []).append(feedback)
 
     def _persist_run(self, run: Run) -> None:
         """Run the evaluator on the run.
diff --git a/libs/langchain/langchain/smith/evaluation/runner_utils.py b/libs/langchain/langchain/smith/evaluation/runner_utils.py
index 9e06fcd65f4..438bc791400 100644
--- a/libs/langchain/langchain/smith/evaluation/runner_utils.py
+++ b/libs/langchain/langchain/smith/evaluation/runner_utils.py
@@ -11,6 +11,7 @@ import uuid
 import warnings
 from enum import Enum
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Coroutine,
@@ -44,6 +45,9 @@ from langchain.schema.runnable import Runnable, RunnableConfig, RunnableLambda
 from langchain.smith.evaluation.config import EvalConfig, RunEvalConfig
 from langchain.smith.evaluation.string_run_evaluator import StringRunEvaluatorChain
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 logger = logging.getLogger(__name__)
 
 MODEL_OR_CHAIN_FACTORY = Union[
@@ -63,6 +67,31 @@ class InputFormatError(Exception):
 ## Shared Utilities
 
 
+class TestResult(dict):
+    """A dictionary of the results of a single test run."""
+
+    def to_dataframe(self) -> pd.DataFrame:
+        """Convert the results to a dataframe."""
+        try:
+            import pandas as pd
+        except ImportError as e:
+            raise ImportError(
+                "Pandas is required to convert the results to a dataframe."
+                " to install pandas, run `pip install pandas`."
+            ) from e
+
+        indices = []
+        records = []
+        for example_id, result in self["results"].items():
+            feedback = result["feedback"]
+            records.append(
+                {**{f.key: f.score for f in feedback}, "output": result["output"]}
+            )
+            indices.append(example_id)
+
+        return pd.DataFrame(records, index=indices)
+
+
 def _get_eval_project_url(api_url: str, project_id: str) -> str:
     """Get the project url from the api url."""
     parsed = urlparse(api_url)
@@ -667,7 +696,7 @@ async def _arun_llm_or_chain(
     tags: Optional[List[str]] = None,
     callbacks: Optional[List[BaseCallbackHandler]] = None,
     input_mapper: Optional[Callable[[Dict], Any]] = None,
-) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
+) -> Union[dict, str, LLMResult, ChatResult]:
     """Asynchronously run the Chain or language model.
 
     Args:
@@ -689,10 +718,10 @@ async def _arun_llm_or_chain(
                 tracer.example_id = example.id
     else:
         previous_example_ids = None
-    outputs = []
     chain_or_llm = (
         "LLM" if isinstance(llm_or_chain_factory, BaseLanguageModel) else "Chain"
     )
+    result = None
     try:
         if isinstance(llm_or_chain_factory, BaseLanguageModel):
             output: Any = await _arun_llm(
@@ -711,15 +740,15 @@ async def _arun_llm_or_chain(
                 callbacks=callbacks,
                 input_mapper=input_mapper,
             )
-        outputs.append(output)
+        result = output
     except Exception as e:
         logger.warning(f"{chain_or_llm} failed for example {example.id}. Error: {e}")
-        outputs.append({"Error": str(e)})
+        result = {"Error": str(e)}
     if callbacks and previous_example_ids:
         for example_id, tracer in zip(previous_example_ids, callbacks):
             if hasattr(tracer, "example_id"):
                 tracer.example_id = example_id
-    return outputs
+    return result
 
 
 async def _gather_with_concurrency(
@@ -856,7 +885,7 @@ async def _arun_on_examples(
         wrapped_model, examples, evaluation, data_type
     )
     examples = _validate_example_inputs(examples, wrapped_model, input_mapper)
-    results: Dict[str, List[Any]] = {}
+    results: Dict[str, dict] = {}
 
     async def process_example(
         example: Example, callbacks: List[BaseCallbackHandler], job_state: dict
@@ -869,7 +898,7 @@ async def _arun_on_examples(
             callbacks=callbacks,
             input_mapper=input_mapper,
         )
-        results[str(example.id)] = result
+        results[str(example.id)] = {"output": result}
         job_state["num_processed"] += 1
         if verbose:
             print(
@@ -890,8 +919,14 @@ async def _arun_on_examples(
         ),
         *(functools.partial(process_example, e) for e in examples),
     )
+    all_feedback = {}
     for handler in evaluation_handlers:
         handler.wait_for_futures()
+        all_feedback.update(handler.logged_feedback)
+    # join the results and feedback on the example id
+    for example_id, output_dict in results.items():
+        feedback = all_feedback.get(example_id, [])
+        output_dict["feedback"] = feedback
     return results
 
 
@@ -978,7 +1013,7 @@ def _run_llm_or_chain(
     tags: Optional[List[str]] = None,
     callbacks: Optional[List[BaseCallbackHandler]] = None,
     input_mapper: Optional[Callable[[Dict], Any]] = None,
-) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
+) -> Union[dict, str, LLMResult, ChatResult]:
     """
     Run the Chain or language model synchronously.
 
@@ -1001,10 +1036,10 @@ def _run_llm_or_chain(
                 tracer.example_id = example.id
     else:
         previous_example_ids = None
-    outputs = []
     chain_or_llm = (
         "LLM" if isinstance(llm_or_chain_factory, BaseLanguageModel) else "Chain"
     )
+    result = None
     try:
         if isinstance(llm_or_chain_factory, BaseLanguageModel):
             output: Any = _run_llm(
@@ -1023,18 +1058,18 @@ def _run_llm_or_chain(
                 tags=tags,
                 input_mapper=input_mapper,
             )
-        outputs.append(output)
+        result = output
     except Exception as e:
         logger.warning(
             f"{chain_or_llm} failed for example {example.id} with inputs:"
             f" {example.inputs}.\nError: {e}",
         )
-        outputs.append({"Error": str(e)})
+        result = {"Error": str(e)}
     if callbacks and previous_example_ids:
         for example_id, tracer in zip(previous_example_ids, callbacks):
             if hasattr(tracer, "example_id"):
                 tracer.example_id = example_id
-    return outputs
+    return result
 
 
 def _run_on_examples(
@@ -1075,7 +1110,7 @@ def _run_on_examples(
     Returns:
         A dictionary mapping example ids to the model outputs.
     """
-    results: Dict[str, Any] = {}
+    results: Dict[str, dict] = {}
     wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory)
     project_name = _get_project_name(project_name, wrapped_model)
     tracer = LangChainTracer(
@@ -1085,11 +1120,11 @@ def _run_on_examples(
         wrapped_model, examples, evaluation, data_type
     )
     examples = _validate_example_inputs(examples, wrapped_model, input_mapper)
-    evalution_handler = EvaluatorCallbackHandler(
+    evaluation_handler = EvaluatorCallbackHandler(
         evaluators=run_evaluators or [],
         client=client,
     )
-    callbacks: List[BaseCallbackHandler] = [tracer, evalution_handler]
+    callbacks: List[BaseCallbackHandler] = [tracer, evaluation_handler]
     for i, example in enumerate(examples):
         result = _run_llm_or_chain(
             example,
@@ -1100,9 +1135,14 @@ def _run_on_examples(
         )
         if verbose:
             print(f"{i+1} processed", flush=True, end="\r")
-        results[str(example.id)] = result
+        results[str(example.id)] = {"output": result}
     tracer.wait_for_futures()
-    evalution_handler.wait_for_futures()
+    evaluation_handler.wait_for_futures()
+    all_feedback = evaluation_handler.logged_feedback
+    # join the results and feedback on the example id
+    for example_id, output_dict in results.items():
+        feedback = all_feedback.get(example_id, [])
+        output_dict["feedback"] = feedback
     return results
 
 
@@ -1276,10 +1316,10 @@ async def arun_on_dataset(
         input_mapper=input_mapper,
         data_type=dataset.data_type,
     )
-    return {
-        "project_name": project_name,
-        "results": results,
-    }
+    return TestResult(
+        project_name=project_name,
+        results=results,
+    )
 
 
 def _handle_coroutine(coro: Coroutine) -> Any:
@@ -1461,7 +1501,7 @@ def run_on_dataset(
             data_type=dataset.data_type,
         )
         results = _handle_coroutine(coro)
-    return {
-        "project_name": project_name,
-        "results": results,
-    }
+    return TestResult(
+        project_name=project_name,
+        results=results,
+    )
diff --git a/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py b/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py
index 5c34f9032fa..914958031dc 100644
--- a/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py
+++ b/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py
@@ -182,14 +182,12 @@ def test_run_llm_or_chain_with_input_mapper() -> None:
         return {"the right input": inputs["the wrong input"]}
 
     result = _run_llm_or_chain(example, lambda: mock_chain, input_mapper=input_mapper)
-    assert len(result) == 1
-    assert result[0] == {"output": "2", "the right input": "1"}
+    assert result == {"output": "2", "the right input": "1"}
     bad_result = _run_llm_or_chain(
         example,
         lambda: mock_chain,
     )
-    assert len(bad_result) == 1
-    assert "Error" in bad_result[0]
+    assert "Error" in bad_result
 
     # Try with LLM
     def llm_input_mapper(inputs: dict) -> str:
@@ -197,9 +195,7 @@ def test_run_llm_or_chain_with_input_mapper() -> None:
         return "the right input"
 
     mock_llm = FakeLLM(queries={"the right input": "somenumber"})
-    result = _run_llm_or_chain(example, mock_llm, input_mapper=llm_input_mapper)
-    assert len(result) == 1
-    llm_result = result[0]
+    llm_result = _run_llm_or_chain(example, mock_llm, input_mapper=llm_input_mapper)
     assert isinstance(llm_result, str)
     assert llm_result == "somenumber"
 
@@ -300,8 +296,8 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
         tags: Optional[List[str]] = None,
         callbacks: Optional[Any] = None,
         **kwargs: Any,
-    ) -> List[Dict[str, Any]]:
-        return [{"result": f"Result for example {example.id}"}]
+    ) -> Dict[str, Any]:
+        return {"result": f"Result for example {example.id}"}
 
     def mock_create_project(*args: Any, **kwargs: Any) -> Any:
         proj = mock.MagicMock()
@@ -328,9 +324,10 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
         )
 
         expected = {
-            uuid_: [
-                {"result": f"Result for example {uuid.UUID(uuid_)}"} for _ in range(1)
-            ]
+            uuid_: {
+                "output": {"result": f"Result for example {uuid.UUID(uuid_)}"},
+                "feedback": [],
+            }
             for uuid_ in uuids
         }
         assert results["results"] == expected

From 7f5713b80ae13e05aea31bc11a671539f8b0fa35 Mon Sep 17 00:00:00 2001
From: eryk-dsai <142571618+eryk-dsai@users.noreply.github.com>
Date: Mon, 28 Aug 2023 18:52:55 +0200
Subject: [PATCH 05/19] feat: grammar-based sampling in llama-cpp (#9712)

## Description

The following PR enables the [grammar-based
sampling](https://github.com/ggerganov/llama.cpp/tree/master/grammars)
in llama-cpp LLM.

In short, loading file with formal grammar definition will constrain
model outputs. For instance, one can force the model to generate valid
JSON or generate only python lists.

In the follow-up PR we will add:
* docs with some description why it is cool and how it works
* maybe some code sample for some task such as in llama repo

---------

Co-authored-by: Lance Martin <lance@langchain.dev>
Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 docs/extras/integrations/llms/llamacpp.ipynb  | 563 ++++++++++++++++--
 .../langchain/llms/grammars/json.gbnf         |  29 +
 .../langchain/llms/grammars/list.gbnf         |  14 +
 libs/langchain/langchain/llms/llamacpp.py     |  62 +-
 4 files changed, 620 insertions(+), 48 deletions(-)
 create mode 100644 libs/langchain/langchain/llms/grammars/json.gbnf
 create mode 100644 libs/langchain/langchain/llms/grammars/list.gbnf

diff --git a/docs/extras/integrations/llms/llamacpp.ipynb b/docs/extras/integrations/llms/llamacpp.ipynb
index 68c8680b050..b40001d8e02 100644
--- a/docs/extras/integrations/llms/llamacpp.ipynb
+++ b/docs/extras/integrations/llms/llamacpp.ipynb
@@ -171,7 +171,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
    "metadata": {
     "tags": []
    },
@@ -192,7 +192,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
    "metadata": {
     "tags": []
    },
@@ -207,7 +207,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "metadata": {
     "tags": []
    },
@@ -397,16 +397,96 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 512\n",
+      "llama_model_load_internal: n_embd     = 5120\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 40\n",
+      "llama_model_load_internal: n_head_kv  = 40\n",
+      "llama_model_load_internal: n_layer    = 40\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: n_gqa      = 1\n",
+      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
+      "llama_model_load_internal: n_ff       = 13824\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: model size = 13B\n",
+      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
+      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
+      "ggml_metal_init: allocating\n",
+      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
+      "ggml_metal_init: loaded kernel_add                            0x1405ed6b0\n",
+      "ggml_metal_init: loaded kernel_add_row                        0x1405eee00\n",
+      "ggml_metal_init: loaded kernel_mul                            0x1405ee650\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x1405eda20\n",
+      "ggml_metal_init: loaded kernel_scale                          0x121fc1d80\n",
+      "ggml_metal_init: loaded kernel_silu                           0x121fc1fe0\n",
+      "ggml_metal_init: loaded kernel_relu                           0x121fc2240\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x121fc24e0\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x121fc2950\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x121fc2d60\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x121fc3160\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x121fc3a20\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x121fc4170\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x121fc4890\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x121fc5010\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x121fc5750\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x121fc5e90\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x121fc65d0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x121fc6d20\n",
+      "ggml_metal_init: loaded kernel_norm                           0x121fc7460\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x121fc7dd0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x121fc8610\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x121fc8e50\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x1405edc80\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x1405efdc0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x140306f30\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x1403073d0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x140307aa0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x140307f80\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x140308460\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x140308940\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x140308e20\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x140309300\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x1403097e0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x140309cc0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x14030a1a0\n",
+      "ggml_metal_init: loaded kernel_rope                           0x14030a400\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x14030aa00\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x14030afd0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x14030b5a0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x14030bb70\n",
+      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
+      "ggml_metal_init: hasUnifiedMemory             = true\n",
+      "ggml_metal_init: maxTransferRate              = built-in GPU\n",
+      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
+      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
+      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, ( 6984.50 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, ( 6985.86 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, ( 7387.86 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, ( 7477.88 / 21845.34)\n",
+      "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
+     ]
+    }
+   ],
    "source": [
     "n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n",
     "\n",
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"./ggml-model-q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
     "    n_gpu_layers=n_gpu_layers,\n",
     "    n_batch=n_batch,\n",
     "    callback_manager=callback_manager,\n",
@@ -416,36 +496,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm_chain = LLMChain(prompt=prompt, llm=llm)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      " We are looking for an NFL team that won the Super Bowl when Justin Bieber (born March 1, 1994) was born. \n",
       "\n",
-      "First, let's look up which year is closest to when Justin Bieber was born:\n",
       "\n",
-      "* The year before he was born: 1993\n",
-      "* The year of his birth: 1994\n",
-      "* The year after he was born: 1995\n",
+      "Justin Bieber was born on March 1, 1994. The Super Bowl is played at the end of the NFL season which runs from September to February.\n",
       "\n",
-      "We want to know what NFL team won the Super Bowl in the year that is closest to when Justin Bieber was born. Therefore, we should look up the NFL team that won the Super Bowl in either 1993 or 1994.\n",
+      "In 1994, the NFL season ended with Super Bowl XXVIII which was played on January 28th, 1994.\n",
       "\n",
-      "Now let's find out which NFL team did win the Super Bowl in either of those years:\n",
-      "\n",
-      "* In 1993, the San Francisco 49ers won the Super Bowl against the Dallas Cowboys by a score of 20-16.\n",
-      "* In 1994, the San Francisco 49ers won the Super Bowl again, this time against the San Diego Chargers by a score of 49-26.\n"
+      "So, there was no Super Bowl in the year Justin Bieber was born. The Super Bowl has only been around since 1967 and is played annually between the champions of the National Football Conference (NFC) and the American Football Conference (AFC)."
      ]
     },
     {
@@ -453,25 +517,27 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =   238.10 ms\n",
-      "llama_print_timings:      sample time =    84.23 ms /   256 runs   (    0.33 ms per token)\n",
-      "llama_print_timings: prompt eval time =   238.04 ms /    49 tokens (    4.86 ms per token)\n",
-      "llama_print_timings:        eval time = 10391.96 ms /   255 runs   (   40.75 ms per token)\n",
-      "llama_print_timings:       total time = 15664.80 ms\n"
+      "llama_print_timings:        load time =   427.90 ms\n",
+      "llama_print_timings:      sample time =    98.36 ms /   133 runs   (    0.74 ms per token,  1352.18 tokens per second)\n",
+      "llama_print_timings: prompt eval time =   427.83 ms /    45 tokens (    9.51 ms per token,   105.18 tokens per second)\n",
+      "llama_print_timings:        eval time =  3687.12 ms /   132 runs   (   27.93 ms per token,    35.80 tokens per second)\n",
+      "llama_print_timings:       total time =  4401.84 ms\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "\" We are looking for an NFL team that won the Super Bowl when Justin Bieber (born March 1, 1994) was born. \\n\\nFirst, let's look up which year is closest to when Justin Bieber was born:\\n\\n* The year before he was born: 1993\\n* The year of his birth: 1994\\n* The year after he was born: 1995\\n\\nWe want to know what NFL team won the Super Bowl in the year that is closest to when Justin Bieber was born. Therefore, we should look up the NFL team that won the Super Bowl in either 1993 or 1994.\\n\\nNow let's find out which NFL team did win the Super Bowl in either of those years:\\n\\n* In 1993, the San Francisco 49ers won the Super Bowl against the Dallas Cowboys by a score of 20-16.\\n* In 1994, the San Francisco 49ers won the Super Bowl again, this time against the San Diego Chargers by a score of 49-26.\\n\""
+       "'\\n\\nJustin Bieber was born on March 1, 1994. The Super Bowl is played at the end of the NFL season which runs from September to February.\\n\\nIn 1994, the NFL season ended with Super Bowl XXVIII which was played on January 28th, 1994.\\n\\nSo, there was no Super Bowl in the year Justin Bieber was born. The Super Bowl has only been around since 1967 and is played annually between the champions of the National Football Conference (NFC) and the American Football Conference (AFC).'"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
+    "\n",
     "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
     "\n",
     "llm_chain.run(question)"
@@ -497,16 +563,96 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 512\n",
+      "llama_model_load_internal: n_embd     = 5120\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 40\n",
+      "llama_model_load_internal: n_head_kv  = 40\n",
+      "llama_model_load_internal: n_layer    = 40\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: n_gqa      = 1\n",
+      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
+      "llama_model_load_internal: n_ff       = 13824\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: model size = 13B\n",
+      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
+      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
+      "ggml_metal_init: allocating\n",
+      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
+      "ggml_metal_init: loaded kernel_add                            0x113b42480\n",
+      "ggml_metal_init: loaded kernel_add_row                        0x113b44210\n",
+      "ggml_metal_init: loaded kernel_mul                            0x113b43a80\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x113b44880\n",
+      "ggml_metal_init: loaded kernel_scale                          0x113b45010\n",
+      "ggml_metal_init: loaded kernel_silu                           0x113b45650\n",
+      "ggml_metal_init: loaded kernel_relu                           0x113b427f0\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x113b46300\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x113b46980\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x113b46e20\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x113b47860\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x113b48010\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x113b48880\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x113b48f70\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x113b49e00\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x113b4a530\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x113b4ac70\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x113b4b3b0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x113b4bb00\n",
+      "ggml_metal_init: loaded kernel_norm                           0x113b4c1a0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x113b4cba0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x113b4d360\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x113b4dba0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x113b4e560\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x113b4ed10\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x113b4f580\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x113b4fdc0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x113b50740\n",
+      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x113b51250\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x113b51a80\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x113b522b0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x113b52ae0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x113b53310\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x113b53b40\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x113b54370\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x113b54ba0\n",
+      "ggml_metal_init: loaded kernel_rope                           0x113b551a0\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x113b55b10\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x113b56450\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x113b56dc0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x113b576b0\n",
+      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
+      "ggml_metal_init: hasUnifiedMemory             = true\n",
+      "ggml_metal_init: maxTransferRate              = built-in GPU\n",
+      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
+      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
+      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, ( 6984.50 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, ( 6985.86 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, ( 7387.86 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, ( 7477.88 / 21845.34)AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
     "\n",
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"./ggml-model-q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
     "    n_gpu_layers=n_gpu_layers,\n",
     "    n_batch=n_batch,\n",
     "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
@@ -531,6 +677,349 @@
     "\n",
     "For the first call to the LLM, the performance may be slow due to the model compilation in Metal GPU."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Grammars\n",
+    "\n",
+    "\n",
+    "We can specify [grammars](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md) to constrain model outputs.\n",
+    "\n",
+    "Supply the path to the specifed `json.gbnf` file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 512\n",
+      "llama_model_load_internal: n_embd     = 5120\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 40\n",
+      "llama_model_load_internal: n_head_kv  = 40\n",
+      "llama_model_load_internal: n_layer    = 40\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: n_gqa      = 1\n",
+      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
+      "llama_model_load_internal: n_ff       = 13824\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: model size = 13B\n",
+      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
+      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
+      "ggml_metal_init: allocating\n",
+      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
+      "ggml_metal_init: loaded kernel_add                            0x1516fb530\n",
+      "ggml_metal_init: loaded kernel_add_row                        0x1516fb790\n",
+      "ggml_metal_init: loaded kernel_mul                            0x1516fb9f0\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x1516fbc50\n",
+      "ggml_metal_init: loaded kernel_scale                          0x1516fbeb0\n",
+      "ggml_metal_init: loaded kernel_silu                           0x1516fc110\n",
+      "ggml_metal_init: loaded kernel_relu                           0x1516fc370\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x1516fc5d0\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x1516fc830\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x1516fca90\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x1516fccf0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x1516fcf50\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x1516fd1b0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x1516fd410\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x1516fd670\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x1516fd8d0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x1516fdb30\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x1516fdd90\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x1516fdff0\n",
+      "ggml_metal_init: loaded kernel_norm                           0x1516fe250\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x1516fe4b0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x1516fe710\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x1516fe970\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x1516febd0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x1516fee30\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x1516ff090\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x1516ff2f0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x1516ff550\n",
+      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x1516ff7b0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x121fce650\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x121fcdce0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x121fceab0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x121fced10\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x121fcef70\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x121fcf1d0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x121fcf430\n",
+      "ggml_metal_init: loaded kernel_rope                           0x121fcf690\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x121fcf8f0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x121fcfb50\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x121fcfdb0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x121fd0010\n",
+      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
+      "ggml_metal_init: hasUnifiedMemory             = true\n",
+      "ggml_metal_init: maxTransferRate              = built-in GPU\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root ::= object \n",
+      "object ::= [{] ws object_11 [}] \n",
+      "value ::= object | array | string | number | boolean | [n] [u] [l] [l] \n",
+      "array ::= [[] ws array_15 []] \n",
+      "string ::= [\"] string_18 [\"] ws \n",
+      "number ::= number_19 number_20 ws \n",
+      "boolean ::= boolean_21 ws \n",
+      "ws ::= ws_23 \n",
+      "object_8 ::= string [:] ws value object_10 \n",
+      "object_9 ::= [,] ws string [:] ws value \n",
+      "object_10 ::= object_9 object_10 | \n",
+      "object_11 ::= object_8 | \n",
+      "array_12 ::= value array_14 \n",
+      "array_13 ::= [,] ws value \n",
+      "array_14 ::= array_13 array_14 | \n",
+      "array_15 ::= array_12 | \n",
+      "string_16 ::= [^\"\\] | [\\] string_17 \n",
+      "string_17 ::= [\"\\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] \n",
+      "string_18 ::= string_16 string_18 | \n",
+      "number_19 ::= [-] | \n",
+      "number_20 ::= [0-9] number_20 | [0-9] \n",
+      "boolean_21 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] \n",
+      "ws_22 ::= [ <U+0009><U+000A>] ws \n",
+      "ws_23 ::= ws_22 | \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
+      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
+      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, (14468.72 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, (14470.08 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, (14872.08 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, (14962.09 / 21845.34)\n",
+      "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
+      "from_string grammar:\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "n_gpu_layers = 1 \n",
+    "n_batch = 512\n",
+    "llm = LlamaCpp(\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
+    "    n_gpu_layers=n_gpu_layers,\n",
+    "    n_batch=n_batch,\n",
+    "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
+    "    callback_manager=callback_manager,\n",
+    "    verbose=True,\n",
+    "    grammar_path=\"/Users/rlm/Desktop/Code/langchain-main/langchain/libs/langchain/langchain/llms/grammars/json.gbnf\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Error in LangChainTracer.on_llm_start callback: ctypes objects containing pointers cannot be pickled\n",
+      "Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
+      "    if self.grammar is not None:\n",
+      "AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\"name\": \"John Doe\", \"age\": 30, \"gender\": \"male\"}"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =   317.62 ms\n",
+      "llama_print_timings:      sample time =   141.83 ms /    22 runs   (    6.45 ms per token,   155.11 tokens per second)\n",
+      "llama_print_timings: prompt eval time =   316.89 ms /     9 tokens (   35.21 ms per token,    28.40 tokens per second)\n",
+      "llama_print_timings:        eval time =   575.93 ms /    21 runs   (   27.43 ms per token,    36.46 tokens per second)\n",
+      "llama_print_timings:       total time =  1087.31 ms\n",
+      "Error in LangChainTracer.on_llm_end callback: ctypes objects containing pointers cannot be pickled\n",
+      "Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
+      "    if self.grammar is not None:\n",
+      "AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
+     ]
+    }
+   ],
+   "source": [
+    "result=llm(\"Describe a person in JSON format:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'John Doe'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eval(result)[\"name\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also try `list.gbnf`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /home/eryk/deepsense/llama-2-7b.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 512\n",
+      "llama_model_load_internal: n_embd     = 4096\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 32\n",
+      "llama_model_load_internal: n_head_kv  = 32\n",
+      "llama_model_load_internal: n_layer    = 32\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: n_gqa      = 1\n",
+      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
+      "llama_model_load_internal: n_ff       = 11008\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: model size = 7B\n",
+      "llama_model_load_internal: ggml ctx size =    0.08 MB\n",
+      "llama_model_load_internal: mem required  = 3615.73 MB (+  256.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  =  256.00 MB\n",
+      "AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | \n",
+      "llama_new_context_with_model: compute buffer total size =   71.84 MB\n",
+      "from_string grammar:\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root ::= [[] items []] EOF \n",
+      "items ::= item items_7 \n",
+      "EOF ::= [<U+000A>] \n",
+      "item ::= string \n",
+      "items_4 ::= [,] items_6 item \n",
+      "ws ::= [ ] \n",
+      "items_6 ::= ws items_6 | \n",
+      "items_7 ::= items_4 items_7 | \n",
+      "string ::= [\"] word string_12 [\"] string_13 \n",
+      "word ::= word_14 \n",
+      "string_10 ::= string_11 word \n",
+      "string_11 ::= ws string_11 | ws \n",
+      "string_12 ::= string_10 string_12 | \n",
+      "string_13 ::= ws string_13 | \n",
+      "word_14 ::= [a-zA-Z] word_14 | [a-zA-Z] \n"
+     ]
+    }
+   ],
+   "source": [
+    "n_gpu_layers = 1 \n",
+    "n_batch = 512\n",
+    "llm = LlamaCpp(\n",
+    "    model_path=\"/home/eryk/deepsense/llama-2-7b.ggmlv3.q4_0.bin\",\n",
+    "    n_gpu_layers=n_gpu_layers,\n",
+    "    n_batch=n_batch,\n",
+    "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
+    "    callback_manager=callback_manager,\n",
+    "    verbose=True,\n",
+    "    grammar_path=\"/home/eryk/deepsense/langchain/libs/langchain/langchain/llms/grammars/list.gbnf\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[\"Jane Eyre\" , \"Sense and Sensibility\" , \"A Tale of Two Cities\"]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =  1079.21 ms\n",
+      "llama_print_timings:      sample time =   225.57 ms /    29 runs   (    7.78 ms per token,   128.56 tokens per second)\n",
+      "llama_print_timings: prompt eval time =  1078.34 ms /    11 tokens (   98.03 ms per token,    10.20 tokens per second)\n",
+      "llama_print_timings:        eval time =  4389.99 ms /    28 runs   (  156.79 ms per token,     6.38 tokens per second)\n",
+      "llama_print_timings:       total time =  5807.84 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "result=llm(\"List of top-3 my favourite books:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Jane Eyre', 'Sense and Sensibility', 'A Tale of Two Cities']"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eval(result)"
+   ]
   }
  ],
  "metadata": {
@@ -549,7 +1038,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.9.16"
   }
  },
  "nbformat": 4,
diff --git a/libs/langchain/langchain/llms/grammars/json.gbnf b/libs/langchain/langchain/llms/grammars/json.gbnf
new file mode 100644
index 00000000000..61bd2b2e65b
--- /dev/null
+++ b/libs/langchain/langchain/llms/grammars/json.gbnf
@@ -0,0 +1,29 @@
+# Grammar for subset of JSON - doesn't support full string or number syntax
+
+root  ::= object
+value ::= object | array | string | number | boolean | "null"
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}"
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]"
+
+string  ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+  )* "\"" ws
+
+# Only plain integers currently
+number  ::= "-"? [0-9]+ ws
+boolean ::= ("true" | "false") ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= ([ \t\n] ws)?
\ No newline at end of file
diff --git a/libs/langchain/langchain/llms/grammars/list.gbnf b/libs/langchain/langchain/llms/grammars/list.gbnf
new file mode 100644
index 00000000000..30ea6e0c849
--- /dev/null
+++ b/libs/langchain/langchain/llms/grammars/list.gbnf
@@ -0,0 +1,14 @@
+root ::= "[" items "]" EOF
+
+items ::= item ("," ws* item)*
+
+item ::= string
+
+string  ::=
+  "\"" word (ws+ word)* "\"" ws*
+
+word ::= [a-zA-Z]+
+
+ws ::= " "
+
+EOF ::= "\n"
\ No newline at end of file
diff --git a/libs/langchain/langchain/llms/llamacpp.py b/libs/langchain/langchain/llms/llamacpp.py
index d5af66dc4c9..0e4b7e8f635 100644
--- a/libs/langchain/langchain/llms/llamacpp.py
+++ b/libs/langchain/langchain/llms/llamacpp.py
@@ -1,5 +1,8 @@
+from __future__ import annotations
+
 import logging
-from typing import Any, Dict, Iterator, List, Optional
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
 
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain.llms.base import LLM
@@ -8,6 +11,9 @@ from langchain.schema.output import GenerationChunk
 from langchain.utils import get_pydantic_field_names
 from langchain.utils.utils import build_extra_kwargs
 
+if TYPE_CHECKING:
+    from llama_cpp import LlamaGrammar
+
 logger = logging.getLogger(__name__)
 
 
@@ -113,12 +119,35 @@ class LlamaCpp(LLM):
     streaming: bool = True
     """Whether to stream the results, token by token."""
 
+    grammar_path: Optional[Union[str, Path]] = None
+    """
+    grammar_path: Path to the .gbnf file that defines formal grammars
+    for constraining model outputs. For instance, the grammar can be used
+    to force the model to generate valid JSON or to speak exclusively in emojis. At most
+    one of grammar_path and grammar should be passed in.
+    """
+    grammar: Optional[Union[str, LlamaGrammar]] = None
+    """
+    grammar: formal grammar for constraining model outputs. For instance, the grammar 
+    can be used to force the model to generate valid JSON or to speak exclusively in 
+    emojis. At most one of grammar_path and grammar should be passed in.
+    """
+
     verbose: bool = True
     """Print verbose output to stderr."""
 
     @root_validator()
     def validate_environment(cls, values: Dict) -> Dict:
         """Validate that llama-cpp-python library is installed."""
+        try:
+            from llama_cpp import Llama, LlamaGrammar
+        except ImportError:
+            raise ImportError(
+                "Could not import llama-cpp-python library. "
+                "Please install the llama-cpp-python library to "
+                "use this embedding model: pip install llama-cpp-python"
+            )
+
         model_path = values["model_path"]
         model_param_names = [
             "rope_freq_scale",
@@ -146,21 +175,26 @@ class LlamaCpp(LLM):
         model_params.update(values["model_kwargs"])
 
         try:
-            from llama_cpp import Llama
-
             values["client"] = Llama(model_path, **model_params)
-        except ImportError:
-            raise ImportError(
-                "Could not import llama-cpp-python library. "
-                "Please install the llama-cpp-python library to "
-                "use this embedding model: pip install llama-cpp-python"
-            )
         except Exception as e:
             raise ValueError(
                 f"Could not load Llama model from path: {model_path}. "
                 f"Received error {e}"
             )
 
+        if values["grammar"] and values["grammar_path"]:
+            grammar = values["grammar"]
+            grammar_path = values["grammar_path"]
+            raise ValueError(
+                "Can only pass in one of grammar and grammar_path. Received "
+                f"{grammar=} and {grammar_path=}."
+            )
+        elif isinstance(values["grammar"], str):
+            values["grammar"] = LlamaGrammar.from_string(values["grammar"])
+        elif values["grammar_path"]:
+            values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
+        else:
+            pass
         return values
 
     @root_validator(pre=True)
@@ -176,7 +210,7 @@ class LlamaCpp(LLM):
     @property
     def _default_params(self) -> Dict[str, Any]:
         """Get the default parameters for calling llama_cpp."""
-        return {
+        params = {
             "suffix": self.suffix,
             "max_tokens": self.max_tokens,
             "temperature": self.temperature,
@@ -187,6 +221,9 @@ class LlamaCpp(LLM):
             "repeat_penalty": self.repeat_penalty,
             "top_k": self.top_k,
         }
+        if self.grammar:
+            params["grammar"] = self.grammar
+        return params
 
     @property
     def _identifying_params(self) -> Dict[str, Any]:
@@ -252,7 +289,10 @@ class LlamaCpp(LLM):
             # and return the combined strings from the first choices's text:
             combined_text_output = ""
             for chunk in self._stream(
-                prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
+                prompt=prompt,
+                stop=stop,
+                run_manager=run_manager,
+                **kwargs,
             ):
                 combined_text_output += chunk.text
             return combined_text_output

From eb3d1fa93caa26d497e5b5bdf6134d266f6a6990 Mon Sep 17 00:00:00 2001
From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com>
Date: Mon, 28 Aug 2023 13:53:27 -0400
Subject: [PATCH 06/19] Add security warning to experimental `SQLDatabaseChain`
 class. (#9867)

The most reliable way to not have a chain run an undesirable SQL command
is to not give it database permissions to run that command. That way the
database itself performs the rule enforcement, so it's much easier to
configure and use properly than anything we could add in ourselves.
---
 .../langchain_experimental/sql/base.py        | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/libs/experimental/langchain_experimental/sql/base.py b/libs/experimental/langchain_experimental/sql/base.py
index fcc664a75d0..5773e455615 100644
--- a/libs/experimental/langchain_experimental/sql/base.py
+++ b/libs/experimental/langchain_experimental/sql/base.py
@@ -29,6 +29,15 @@ class SQLDatabaseChain(Chain):
             from langchain import OpenAI, SQLDatabase
             db = SQLDatabase(...)
             db_chain = SQLDatabaseChain.from_llm(OpenAI(), db)
+
+    *Security note*: Make sure that the database connection uses credentials
+        that are narrowly-scoped to only include the permissions this chain needs.
+        Failure to do so may result in data corruption or loss, since this chain may
+        attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this chain.
+        This issue shows an example negative outcome if these steps are not taken:
+        https://github.com/langchain-ai/langchain/issues/5923
     """
 
     llm_chain: LLMChain
@@ -49,7 +58,7 @@ class SQLDatabaseChain(Chain):
     return_direct: bool = False
     """Whether or not to return the result of querying the SQL table directly."""
     use_query_checker: bool = False
-    """Whether or not the query checker tool should be used to attempt 
+    """Whether or not the query checker tool should be used to attempt
     to fix the initial SQL from the LLM."""
     query_checker_prompt: Optional[BasePromptTemplate] = None
     """The prompt template that should be used by the query checker"""
@@ -197,6 +206,17 @@ class SQLDatabaseChain(Chain):
         prompt: Optional[BasePromptTemplate] = None,
         **kwargs: Any,
     ) -> SQLDatabaseChain:
+        """Create a SQLDatabaseChain from an LLM and a database connection.
+
+        *Security note*: Make sure that the database connection uses credentials
+            that are narrowly-scoped to only include the permissions this chain needs.
+            Failure to do so may result in data corruption or loss, since this chain may
+            attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+            The best way to guard against such negative outcomes is to (as appropriate)
+            limit the permissions granted to the credentials used with this chain.
+            This issue shows an example negative outcome if these steps are not taken:
+            https://github.com/langchain-ai/langchain/issues/5923
+        """
         prompt = prompt or SQL_PROMPTS.get(db.dialect, PROMPT)
         llm_chain = LLMChain(llm=llm, prompt=prompt)
         return cls(llm_chain=llm_chain, database=db, **kwargs)

From 8393ba9dab6eecb421061048fb4843c91cc5b6f5 Mon Sep 17 00:00:00 2001
From: Lance Martin <122662504+rlancemartin@users.noreply.github.com>
Date: Mon, 28 Aug 2023 12:56:46 -0700
Subject: [PATCH 07/19] Add instructions for GGUF (#9874)

llama.cpp migrated to GGUF model format, and new releases (e.g.,
[here](https://huggingface.co/TheBloke)) now use GGUF.
---
 docs/extras/integrations/llms/llamacpp.ipynb | 531 +++----------------
 1 file changed, 87 insertions(+), 444 deletions(-)

diff --git a/docs/extras/integrations/llms/llamacpp.ipynb b/docs/extras/integrations/llms/llamacpp.ipynb
index b40001d8e02..71ba60db70c 100644
--- a/docs/extras/integrations/llms/llamacpp.ipynb
+++ b/docs/extras/integrations/llms/llamacpp.ipynb
@@ -7,9 +7,20 @@
     "# Llama.cpp\n",
     "\n",
     "[llama-cpp-python](https://github.com/abetlen/llama-cpp-python) is a Python binding for [llama.cpp](https://github.com/ggerganov/llama.cpp). \n",
-    "It supports [several LLMs](https://github.com/ggerganov/llama.cpp).\n",
     "\n",
-    "This notebook goes over how to run `llama-cpp-python` within LangChain."
+    "It supports inference for [many LLMs](https://github.com/ggerganov/llama.cpp), which can be accessed on [HuggingFace](https://huggingface.co/TheBloke).\n",
+    "\n",
+    "This notebook goes over how to run `llama-cpp-python` within LangChain.\n",
+    "\n",
+    "**Note: new versions of `llama-cpp-python` use GGUF model files (see [here](https://github.com/abetlen/llama-cpp-python/pull/633)).**\n",
+    "\n",
+    "This is a breaking change.\n",
+    " \n",
+    "To convert existing GGML models to GGUF you can run the following in [llama.cpp](https://github.com/ggerganov/llama.cpp):\n",
+    "\n",
+    "```\n",
+    "python ./convert-llama-ggmlv3-to-gguf.py --eps 1e-5 --input models/openorca-platypus2-13b.ggmlv3.q4_0.bin --output models/openorca-platypus2-13b.gguf.q4_0.bin\n",
+    "```"
    ]
   },
   {
@@ -19,7 +30,7 @@
     "## Installation\n",
     "\n",
     "There are different options on how to install the llama-cpp package: \n",
-    "- only CPU usage\n",
+    "- CPU usage\n",
     "- CPU + GPU (using one of many BLAS backends)\n",
     "- Metal GPU (MacOS with Apple Silicon Chip) \n",
     "\n",
@@ -171,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {
     "tags": []
    },
@@ -207,15 +218,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
     "# Callbacks support token-wise streaming\n",
-    "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
-    "# Verbose is required to pass to the callback manager"
+    "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])"
    ]
   },
   {
@@ -240,12 +250,12 @@
    "source": [
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"/Users/rlm/Desktop/Code/llama/llama-2-7b-ggml/llama-2-7b-chat.ggmlv3.q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
     "    temperature=0.75,\n",
     "    max_tokens=2000,\n",
     "    top_p=1,\n",
-    "    callback_manager=callback_manager,\n",
-    "    verbose=True,\n",
+    "    callback_manager=callback_manager, \n",
+    "    verbose=True, # Verbose is required to pass to the callback manager\n",
     ")"
    ]
   },
@@ -375,7 +385,6 @@
    ],
    "source": [
     "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
-    "\n",
     "llm_chain.run(question)"
    ]
   },
@@ -397,100 +406,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
-      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
-      "llama_model_load_internal: n_vocab    = 32000\n",
-      "llama_model_load_internal: n_ctx      = 512\n",
-      "llama_model_load_internal: n_embd     = 5120\n",
-      "llama_model_load_internal: n_mult     = 256\n",
-      "llama_model_load_internal: n_head     = 40\n",
-      "llama_model_load_internal: n_head_kv  = 40\n",
-      "llama_model_load_internal: n_layer    = 40\n",
-      "llama_model_load_internal: n_rot      = 128\n",
-      "llama_model_load_internal: n_gqa      = 1\n",
-      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
-      "llama_model_load_internal: n_ff       = 13824\n",
-      "llama_model_load_internal: freq_base  = 10000.0\n",
-      "llama_model_load_internal: freq_scale = 1\n",
-      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
-      "llama_model_load_internal: model size = 13B\n",
-      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
-      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
-      "ggml_metal_init: allocating\n",
-      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x1405ed6b0\n",
-      "ggml_metal_init: loaded kernel_add_row                        0x1405eee00\n",
-      "ggml_metal_init: loaded kernel_mul                            0x1405ee650\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x1405eda20\n",
-      "ggml_metal_init: loaded kernel_scale                          0x121fc1d80\n",
-      "ggml_metal_init: loaded kernel_silu                           0x121fc1fe0\n",
-      "ggml_metal_init: loaded kernel_relu                           0x121fc2240\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x121fc24e0\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x121fc2950\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x121fc2d60\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x121fc3160\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x121fc3a20\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x121fc4170\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x121fc4890\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x121fc5010\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x121fc5750\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x121fc5e90\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x121fc65d0\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x121fc6d20\n",
-      "ggml_metal_init: loaded kernel_norm                           0x121fc7460\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x121fc7dd0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x121fc8610\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x121fc8e50\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x1405edc80\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x1405efdc0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x140306f30\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x1403073d0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x140307aa0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x140307f80\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x140308460\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x140308940\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x140308e20\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x140309300\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x1403097e0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x140309cc0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x14030a1a0\n",
-      "ggml_metal_init: loaded kernel_rope                           0x14030a400\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x14030aa00\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x14030afd0\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x14030b5a0\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x14030bb70\n",
-      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
-      "ggml_metal_init: hasUnifiedMemory             = true\n",
-      "ggml_metal_init: maxTransferRate              = built-in GPU\n",
-      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
-      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
-      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, ( 6984.50 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, ( 6985.86 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, ( 7387.86 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, ( 7477.88 / 21845.34)\n",
-      "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n",
     "\n",
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
     "    n_gpu_layers=n_gpu_layers,\n",
     "    n_batch=n_batch,\n",
     "    callback_manager=callback_manager,\n",
-    "    verbose=True,\n",
+    "    verbose=True, # Verbose is required to pass to the callback manager\n",
     ")"
    ]
   },
@@ -505,11 +434,13 @@
      "text": [
       "\n",
       "\n",
-      "Justin Bieber was born on March 1, 1994. The Super Bowl is played at the end of the NFL season which runs from September to February.\n",
+      "1. Identify Justin Bieber's birth date: Justin Bieber was born on March 1, 1994.\n",
       "\n",
-      "In 1994, the NFL season ended with Super Bowl XXVIII which was played on January 28th, 1994.\n",
+      "2. Find the Super Bowl winner of that year: The NFL season of 1993 with the Super Bowl being played in January or of 1994.\n",
       "\n",
-      "So, there was no Super Bowl in the year Justin Bieber was born. The Super Bowl has only been around since 1967 and is played annually between the champions of the National Football Conference (NFC) and the American Football Conference (AFC)."
+      "3. Determine which team won the game: The Dallas Cowboys faced the Buffalo Bills in Super Bowl XXVII on January 31, 1993 (as the year is mis-labelled due to a error). The Dallas Cowboys won this matchup.\n",
+      "\n",
+      "So, Justin Bieber was born when the Dallas Cowboys were the reigning NFL Super Bowl."
      ]
     },
     {
@@ -517,17 +448,17 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =   427.90 ms\n",
-      "llama_print_timings:      sample time =    98.36 ms /   133 runs   (    0.74 ms per token,  1352.18 tokens per second)\n",
-      "llama_print_timings: prompt eval time =   427.83 ms /    45 tokens (    9.51 ms per token,   105.18 tokens per second)\n",
-      "llama_print_timings:        eval time =  3687.12 ms /   132 runs   (   27.93 ms per token,    35.80 tokens per second)\n",
-      "llama_print_timings:       total time =  4401.84 ms\n"
+      "llama_print_timings:        load time =   427.63 ms\n",
+      "llama_print_timings:      sample time =   115.85 ms /   164 runs   (    0.71 ms per token,  1415.67 tokens per second)\n",
+      "llama_print_timings: prompt eval time =   427.53 ms /    45 tokens (    9.50 ms per token,   105.26 tokens per second)\n",
+      "llama_print_timings:        eval time =  4526.53 ms /   163 runs   (   27.77 ms per token,    36.01 tokens per second)\n",
+      "llama_print_timings:       total time =  5293.77 ms\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "'\\n\\nJustin Bieber was born on March 1, 1994. The Super Bowl is played at the end of the NFL season which runs from September to February.\\n\\nIn 1994, the NFL season ended with Super Bowl XXVIII which was played on January 28th, 1994.\\n\\nSo, there was no Super Bowl in the year Justin Bieber was born. The Super Bowl has only been around since 1967 and is played annually between the champions of the National Football Conference (NFC) and the American Football Conference (AFC).'"
+       "\"\\n\\n1. Identify Justin Bieber's birth date: Justin Bieber was born on March 1, 1994.\\n\\n2. Find the Super Bowl winner of that year: The NFL season of 1993 with the Super Bowl being played in January or of 1994.\\n\\n3. Determine which team won the game: The Dallas Cowboys faced the Buffalo Bills in Super Bowl XXVII on January 31, 1993 (as the year is mis-labelled due to a error). The Dallas Cowboys won this matchup.\\n\\nSo, Justin Bieber was born when the Dallas Cowboys were the reigning NFL Super Bowl.\""
       ]
      },
      "execution_count": 5,
@@ -537,9 +468,7 @@
    ],
    "source": [
     "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
-    "\n",
     "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
-    "\n",
     "llm_chain.run(question)"
    ]
   },
@@ -563,101 +492,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
-      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
-      "llama_model_load_internal: n_vocab    = 32000\n",
-      "llama_model_load_internal: n_ctx      = 512\n",
-      "llama_model_load_internal: n_embd     = 5120\n",
-      "llama_model_load_internal: n_mult     = 256\n",
-      "llama_model_load_internal: n_head     = 40\n",
-      "llama_model_load_internal: n_head_kv  = 40\n",
-      "llama_model_load_internal: n_layer    = 40\n",
-      "llama_model_load_internal: n_rot      = 128\n",
-      "llama_model_load_internal: n_gqa      = 1\n",
-      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
-      "llama_model_load_internal: n_ff       = 13824\n",
-      "llama_model_load_internal: freq_base  = 10000.0\n",
-      "llama_model_load_internal: freq_scale = 1\n",
-      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
-      "llama_model_load_internal: model size = 13B\n",
-      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
-      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
-      "ggml_metal_init: allocating\n",
-      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x113b42480\n",
-      "ggml_metal_init: loaded kernel_add_row                        0x113b44210\n",
-      "ggml_metal_init: loaded kernel_mul                            0x113b43a80\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x113b44880\n",
-      "ggml_metal_init: loaded kernel_scale                          0x113b45010\n",
-      "ggml_metal_init: loaded kernel_silu                           0x113b45650\n",
-      "ggml_metal_init: loaded kernel_relu                           0x113b427f0\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x113b46300\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x113b46980\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x113b46e20\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x113b47860\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x113b48010\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x113b48880\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x113b48f70\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x113b49e00\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x113b4a530\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x113b4ac70\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x113b4b3b0\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x113b4bb00\n",
-      "ggml_metal_init: loaded kernel_norm                           0x113b4c1a0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x113b4cba0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x113b4d360\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x113b4dba0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x113b4e560\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x113b4ed10\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x113b4f580\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x113b4fdc0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x113b50740\n",
-      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x113b51250\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x113b51a80\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x113b522b0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x113b52ae0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x113b53310\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x113b53b40\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x113b54370\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x113b54ba0\n",
-      "ggml_metal_init: loaded kernel_rope                           0x113b551a0\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x113b55b10\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x113b56450\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x113b56dc0\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x113b576b0\n",
-      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
-      "ggml_metal_init: hasUnifiedMemory             = true\n",
-      "ggml_metal_init: maxTransferRate              = built-in GPU\n",
-      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
-      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
-      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, ( 6984.50 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, ( 6985.86 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, ( 7387.86 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, ( 7477.88 / 21845.34)AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
-    "\n",
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
     "    n_gpu_layers=n_gpu_layers,\n",
     "    n_batch=n_batch,\n",
     "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
     "    callback_manager=callback_manager,\n",
-    "    verbose=True,\n",
+    "    verbose=True, # Verbose is required to pass to the callback manager\n",
     ")"
    ]
   },
@@ -687,142 +535,27 @@
     "\n",
     "We can specify [grammars](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md) to constrain model outputs.\n",
     "\n",
-    "Supply the path to the specifed `json.gbnf` file."
+    "This will sample tokens according to the grammar.\n",
+    "  \n",
+    "For example, supply the path to the specifed `json.gbnf` file in order to produce JSON."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
-      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
-      "llama_model_load_internal: n_vocab    = 32000\n",
-      "llama_model_load_internal: n_ctx      = 512\n",
-      "llama_model_load_internal: n_embd     = 5120\n",
-      "llama_model_load_internal: n_mult     = 256\n",
-      "llama_model_load_internal: n_head     = 40\n",
-      "llama_model_load_internal: n_head_kv  = 40\n",
-      "llama_model_load_internal: n_layer    = 40\n",
-      "llama_model_load_internal: n_rot      = 128\n",
-      "llama_model_load_internal: n_gqa      = 1\n",
-      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
-      "llama_model_load_internal: n_ff       = 13824\n",
-      "llama_model_load_internal: freq_base  = 10000.0\n",
-      "llama_model_load_internal: freq_scale = 1\n",
-      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
-      "llama_model_load_internal: model size = 13B\n",
-      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
-      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
-      "ggml_metal_init: allocating\n",
-      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x1516fb530\n",
-      "ggml_metal_init: loaded kernel_add_row                        0x1516fb790\n",
-      "ggml_metal_init: loaded kernel_mul                            0x1516fb9f0\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x1516fbc50\n",
-      "ggml_metal_init: loaded kernel_scale                          0x1516fbeb0\n",
-      "ggml_metal_init: loaded kernel_silu                           0x1516fc110\n",
-      "ggml_metal_init: loaded kernel_relu                           0x1516fc370\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x1516fc5d0\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x1516fc830\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x1516fca90\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x1516fccf0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x1516fcf50\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x1516fd1b0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x1516fd410\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x1516fd670\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x1516fd8d0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x1516fdb30\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x1516fdd90\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x1516fdff0\n",
-      "ggml_metal_init: loaded kernel_norm                           0x1516fe250\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x1516fe4b0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x1516fe710\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x1516fe970\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x1516febd0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x1516fee30\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x1516ff090\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x1516ff2f0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x1516ff550\n",
-      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x1516ff7b0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x121fce650\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x121fcdce0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x121fceab0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x121fced10\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x121fcef70\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x121fcf1d0\n",
-      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x121fcf430\n",
-      "ggml_metal_init: loaded kernel_rope                           0x121fcf690\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x121fcf8f0\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x121fcfb50\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x121fcfdb0\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x121fd0010\n",
-      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
-      "ggml_metal_init: hasUnifiedMemory             = true\n",
-      "ggml_metal_init: maxTransferRate              = built-in GPU\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "root ::= object \n",
-      "object ::= [{] ws object_11 [}] \n",
-      "value ::= object | array | string | number | boolean | [n] [u] [l] [l] \n",
-      "array ::= [[] ws array_15 []] \n",
-      "string ::= [\"] string_18 [\"] ws \n",
-      "number ::= number_19 number_20 ws \n",
-      "boolean ::= boolean_21 ws \n",
-      "ws ::= ws_23 \n",
-      "object_8 ::= string [:] ws value object_10 \n",
-      "object_9 ::= [,] ws string [:] ws value \n",
-      "object_10 ::= object_9 object_10 | \n",
-      "object_11 ::= object_8 | \n",
-      "array_12 ::= value array_14 \n",
-      "array_13 ::= [,] ws value \n",
-      "array_14 ::= array_13 array_14 | \n",
-      "array_15 ::= array_12 | \n",
-      "string_16 ::= [^\"\\] | [\\] string_17 \n",
-      "string_17 ::= [\"\\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] \n",
-      "string_18 ::= string_16 string_18 | \n",
-      "number_19 ::= [-] | \n",
-      "number_20 ::= [0-9] number_20 | [0-9] \n",
-      "boolean_21 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] \n",
-      "ws_22 ::= [ <U+0009><U+000A>] ws \n",
-      "ws_23 ::= ws_22 | \n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
-      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
-      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, (14468.72 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, (14470.08 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, (14872.08 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, (14962.09 / 21845.34)\n",
-      "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
-      "from_string grammar:\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "n_gpu_layers = 1 \n",
-    "n_batch = 512\n",
+    "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
+    "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
+    "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
     "    n_gpu_layers=n_gpu_layers,\n",
     "    n_batch=n_batch,\n",
     "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
     "    callback_manager=callback_manager,\n",
-    "    verbose=True,\n",
+    "    verbose=True, # Verbose is required to pass to the callback manager\n",
     "    grammar_path=\"/Users/rlm/Desktop/Code/langchain-main/langchain/libs/langchain/langchain/llms/grammars/json.gbnf\",\n",
     ")"
    ]
@@ -832,23 +565,29 @@
    "execution_count": 7,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Error in LangChainTracer.on_llm_start callback: ctypes objects containing pointers cannot be pickled\n",
-      "Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
-      "Traceback (most recent call last):\n",
-      "  File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
-      "    if self.grammar is not None:\n",
-      "AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{\"name\": \"John Doe\", \"age\": 30, \"gender\": \"male\"}"
+      "{\n",
+      "  \"name\": \"John Doe\",\n",
+      "  \"age\": 34,\n",
+      "  \"\": {\n",
+      "    \"title\": \"Software Developer\",\n",
+      "    \"company\": \"Google\"\n",
+      "  },\n",
+      "  \"interests\": [\n",
+      "    \"Sports\",\n",
+      "    \"Music\",\n",
+      "    \"Cooking\"\n",
+      "  ],\n",
+      "  \"address\": {\n",
+      "    \"street_number\": 123,\n",
+      "    \"street_name\": \"Oak Street\",\n",
+      "    \"city\": \"Mountain View\",\n",
+      "    \"state\": \"California\",\n",
+      "    \"postal_code\": 94040\n",
+      "  }}"
      ]
     },
     {
@@ -856,132 +595,55 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =   317.62 ms\n",
-      "llama_print_timings:      sample time =   141.83 ms /    22 runs   (    6.45 ms per token,   155.11 tokens per second)\n",
-      "llama_print_timings: prompt eval time =   316.89 ms /     9 tokens (   35.21 ms per token,    28.40 tokens per second)\n",
-      "llama_print_timings:        eval time =   575.93 ms /    21 runs   (   27.43 ms per token,    36.46 tokens per second)\n",
-      "llama_print_timings:       total time =  1087.31 ms\n",
-      "Error in LangChainTracer.on_llm_end callback: ctypes objects containing pointers cannot be pickled\n",
-      "Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
-      "Traceback (most recent call last):\n",
-      "  File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
-      "    if self.grammar is not None:\n",
-      "AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
+      "llama_print_timings:        load time =   357.51 ms\n",
+      "llama_print_timings:      sample time =  1213.30 ms /   144 runs   (    8.43 ms per token,   118.68 tokens per second)\n",
+      "llama_print_timings: prompt eval time =   356.78 ms /     9 tokens (   39.64 ms per token,    25.23 tokens per second)\n",
+      "llama_print_timings:        eval time =  3947.16 ms /   143 runs   (   27.60 ms per token,    36.23 tokens per second)\n",
+      "llama_print_timings:       total time =  5846.21 ms\n"
      ]
     }
    ],
    "source": [
+    "%%capture captured --no-stdout\n",
     "result=llm(\"Describe a person in JSON format:\")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'John Doe'"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "eval(result)[\"name\"]"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can also try `list.gbnf`."
+    "We can also supply `list.gbnf` to return a list."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "llama.cpp: loading model from /home/eryk/deepsense/llama-2-7b.ggmlv3.q4_0.bin\n",
-      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
-      "llama_model_load_internal: n_vocab    = 32000\n",
-      "llama_model_load_internal: n_ctx      = 512\n",
-      "llama_model_load_internal: n_embd     = 4096\n",
-      "llama_model_load_internal: n_mult     = 256\n",
-      "llama_model_load_internal: n_head     = 32\n",
-      "llama_model_load_internal: n_head_kv  = 32\n",
-      "llama_model_load_internal: n_layer    = 32\n",
-      "llama_model_load_internal: n_rot      = 128\n",
-      "llama_model_load_internal: n_gqa      = 1\n",
-      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
-      "llama_model_load_internal: n_ff       = 11008\n",
-      "llama_model_load_internal: freq_base  = 10000.0\n",
-      "llama_model_load_internal: freq_scale = 1\n",
-      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
-      "llama_model_load_internal: model size = 7B\n",
-      "llama_model_load_internal: ggml ctx size =    0.08 MB\n",
-      "llama_model_load_internal: mem required  = 3615.73 MB (+  256.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  =  256.00 MB\n",
-      "AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | \n",
-      "llama_new_context_with_model: compute buffer total size =   71.84 MB\n",
-      "from_string grammar:\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "root ::= [[] items []] EOF \n",
-      "items ::= item items_7 \n",
-      "EOF ::= [<U+000A>] \n",
-      "item ::= string \n",
-      "items_4 ::= [,] items_6 item \n",
-      "ws ::= [ ] \n",
-      "items_6 ::= ws items_6 | \n",
-      "items_7 ::= items_4 items_7 | \n",
-      "string ::= [\"] word string_12 [\"] string_13 \n",
-      "word ::= word_14 \n",
-      "string_10 ::= string_11 word \n",
-      "string_11 ::= ws string_11 | ws \n",
-      "string_12 ::= string_10 string_12 | \n",
-      "string_13 ::= ws string_13 | \n",
-      "word_14 ::= [a-zA-Z] word_14 | [a-zA-Z] \n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "n_gpu_layers = 1 \n",
     "n_batch = 512\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"/home/eryk/deepsense/llama-2-7b.ggmlv3.q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
     "    n_gpu_layers=n_gpu_layers,\n",
     "    n_batch=n_batch,\n",
     "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
     "    callback_manager=callback_manager,\n",
     "    verbose=True,\n",
-    "    grammar_path=\"/home/eryk/deepsense/langchain/libs/langchain/langchain/llms/grammars/list.gbnf\",\n",
+    "    grammar_path=\"/Users/rlm/Desktop/Code/langchain-main/langchain/libs/langchain/langchain/llms/grammars/list.gbnf\",\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[\"Jane Eyre\" , \"Sense and Sensibility\" , \"A Tale of Two Cities\"]\n"
+      "[\"The Catcher in the Rye\", \"Wuthering Heights\", \"Anna Karenina\"]\n"
      ]
     },
     {
@@ -989,37 +651,18 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =  1079.21 ms\n",
-      "llama_print_timings:      sample time =   225.57 ms /    29 runs   (    7.78 ms per token,   128.56 tokens per second)\n",
-      "llama_print_timings: prompt eval time =  1078.34 ms /    11 tokens (   98.03 ms per token,    10.20 tokens per second)\n",
-      "llama_print_timings:        eval time =  4389.99 ms /    28 runs   (  156.79 ms per token,     6.38 tokens per second)\n",
-      "llama_print_timings:       total time =  5807.84 ms\n"
+      "llama_print_timings:        load time =   322.34 ms\n",
+      "llama_print_timings:      sample time =   232.60 ms /    26 runs   (    8.95 ms per token,   111.78 tokens per second)\n",
+      "llama_print_timings: prompt eval time =   321.90 ms /    11 tokens (   29.26 ms per token,    34.17 tokens per second)\n",
+      "llama_print_timings:        eval time =   680.82 ms /    25 runs   (   27.23 ms per token,    36.72 tokens per second)\n",
+      "llama_print_timings:       total time =  1295.27 ms\n"
      ]
     }
    ],
    "source": [
+    "%%capture captured --no-stdout\n",
     "result=llm(\"List of top-3 my favourite books:\")"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 85,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Jane Eyre', 'Sense and Sensibility', 'A Tale of Two Cities']"
-      ]
-     },
-     "execution_count": 85,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "eval(result)"
-   ]
   }
  ],
  "metadata": {

From b14d74dd4d317c492fc6fac5bf18fccb869c6683 Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Mon, 28 Aug 2023 13:43:59 -0700
Subject: [PATCH 08/19] iMessage loader (#9832)

Add an iMessage chat loader
---
 .../integrations/chat_loaders/imessage.ipynb  | 420 ++++++++++++++++++
 .../langchain/chat_loaders/imessage.py        | 117 +++++
 2 files changed, 537 insertions(+)
 create mode 100644 docs/extras/integrations/chat_loaders/imessage.ipynb
 create mode 100644 libs/langchain/langchain/chat_loaders/imessage.py

diff --git a/docs/extras/integrations/chat_loaders/imessage.ipynb b/docs/extras/integrations/chat_loaders/imessage.ipynb
new file mode 100644
index 00000000000..58f502e53b0
--- /dev/null
+++ b/docs/extras/integrations/chat_loaders/imessage.ipynb
@@ -0,0 +1,420 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "01fcfa2f-33a9-48f3-835a-b1956c394d6b",
+   "metadata": {},
+   "source": [
+    "# iMessage\n",
+    "\n",
+    "This notebook shows how to use the iMessage chat loader. This class helps convert iMessage conversations to LangChain chat messages.\n",
+    "\n",
+    "On MacOS, iMessage stores conversations in a sqlite database at `~/Library/Messages/chat.db` (at least for macOS Ventura 13.4). \n",
+    "The `IMessageChatLoader` loads from this database file. \n",
+    "\n",
+    "1. Create the `IMessageChatLoader` with the file path pointed to `chat.db` database you'd like to process.\n",
+    "2. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class.\n",
+    "\n",
+    "## 1. Access Chat DB\n",
+    "\n",
+    "It's likely that your terminal is denied access to `~/Library/Messages`. To use this class, you can copy the DB to an accessible directory (e.g., Documents) and load from there. Alternatively (and not recommended), you can grant full disk access for your terminal emulator in System Settings > Securityand Privacy > Full Disk Access.\n",
+    "\n",
+    "We have created an example database you can use at [this linked drive file](https://drive.google.com/file/d/1NebNKqTA2NXApCmeH6mu0unJD2tANZzo/view?usp=sharing)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "036ce7e0-a38f-4cbe-89a6-a205ae7c23be",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "File chat.db downloaded.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This uses some example data\n",
+    "import requests\n",
+    "\n",
+    "def download_drive_file(url: str, output_path: str = 'chat.db') -> None:\n",
+    "    file_id = url.split('/')[-2]\n",
+    "    download_url = f'https://drive.google.com/uc?export=download&id={file_id}'\n",
+    "\n",
+    "    response = requests.get(download_url)\n",
+    "    if response.status_code != 200:\n",
+    "        print('Failed to download the file.')\n",
+    "        return\n",
+    "\n",
+    "    with open(output_path, 'wb') as file:\n",
+    "        file.write(response.content)\n",
+    "        print(f'File {output_path} downloaded.')\n",
+    "\n",
+    "url = 'https://drive.google.com/file/d/1NebNKqTA2NXApCmeH6mu0unJD2tANZzo/view?usp=sharing'\n",
+    "\n",
+    "# Download file to chat.db\n",
+    "download_drive_file(url)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cf60f703-76f1-4602-a723-02c59535c1af",
+   "metadata": {},
+   "source": [
+    "## 2. Create the Chat Loader\n",
+    "\n",
+    "Provide the loader with the file path to the zip directory. You can optionally specify the user id that maps to an ai message as well an configure whether to merge message runs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "4b8b432a-d2bc-49e1-b35f-761730a8fd6d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_loaders.imessage import IMessageChatLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8ec6661b-0aca-48ae-9e2b-6412856c287b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = IMessageChatLoader(\n",
+    "    path=\"./chat.db\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8805a7c5-84b4-49f5-8989-0022f2054ace",
+   "metadata": {},
+   "source": [
+    "## 3. Load messages\n",
+    "\n",
+    "The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently just contain a list of messages per loaded conversation. All messages are mapped to \"HumanMessage\" objects to start. \n",
+    "\n",
+    "You can optionally choose to merge message \"runs\" (consecutive messages from the same sender) and select a sender to represent the \"AI\". The fine-tuned LLM will learn to generate these AI messages."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fcd69b3e-020d-4a15-8a0d-61c2d34e1ee1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "from langchain.chat_loaders.base import ChatSession\n",
+    "from langchain.chat_loaders.utils import (\n",
+    "    map_ai_messages,\n",
+    "    merge_chat_runs,\n",
+    ")\n",
+    "\n",
+    "raw_messages = loader.lazy_load()\n",
+    "# Merge consecutive messages from the same sender into a single message\n",
+    "merged_messages = merge_chat_runs(raw_messages)\n",
+    "# Convert messages from \"Tortoise\" to AI messages. Do you have a guess who these conversations are between?\n",
+    "chat_sessions: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"Tortoise\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "370b8c26-c7a8-434c-a225-45c20ff14a03",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[AIMessage(content=\"Slow and steady, that's my motto.\", additional_kwargs={'message_time': 1693182723, 'sender': 'Tortoise'}, example=False),\n",
+       " HumanMessage(content='Speed is key!', additional_kwargs={'message_time': 1693182753, 'sender': 'Hare'}, example=False),\n",
+       " AIMessage(content='A balanced approach is more reliable.', additional_kwargs={'message_time': 1693182783, 'sender': 'Tortoise'}, example=False)]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Now all of the Tortoise's messages will take the AI message class\n",
+    "# which maps to the 'assistant' role in OpenAI's training format\n",
+    "alternating_sessions[0]['messages'][:3]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "05208f9d-3193-4a8d-86a5-13df2c8197e5",
+   "metadata": {},
+   "source": [
+    "## 3. Prepare for fine-tuning\n",
+    "\n",
+    "Now it's time to convert our chat  messages to OpenAI dictionaries. We can use the `convert_messages_for_finetuning` utility to do so."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "8834861f-f37f-4c08-96c6-917269bf09b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.adapters.openai import convert_messages_for_finetuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "ce7ab0f9-6e6a-4a1c-8b86-c635251d437e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Prepared 10 dialogues for training\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_data = convert_messages_for_finetuning(alternating_sessions)\n",
+    "print(f\"Prepared {len(training_data)} dialogues for training\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b494d64c-8056-42ae-b4c1-a9cfabc002ea",
+   "metadata": {},
+   "source": [
+    "## 4. Fine-tune the model\n",
+    "\n",
+    "It's time to fine-tune the model. Make sure you have `openai` installed\n",
+    "and have set your `OPENAI_API_KEY` appropriately"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "b4b60daa-b899-4291-a09a-412ce9c218fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install -U openai --quiet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "2cca6c95-c0d6-4826-b4fa-1c403f217f93",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "File file-zHIgf4r8LltZG3RFpkGd4Sjf ready after 10.19 seconds.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "from io import BytesIO\n",
+    "import time\n",
+    "\n",
+    "import openai\n",
+    "\n",
+    "# We will write the jsonl file in memory\n",
+    "my_file = BytesIO()\n",
+    "for m in training_data:\n",
+    "    my_file.write((json.dumps({\"messages\": m}) + \"\\n\").encode('utf-8'))\n",
+    "\n",
+    "my_file.seek(0)\n",
+    "training_file = openai.File.create(\n",
+    "  file=my_file,\n",
+    "  purpose='fine-tune'\n",
+    ")\n",
+    "\n",
+    "# OpenAI audits each training file for compliance reasons.\n",
+    "# This make take a few minutes\n",
+    "status = openai.File.retrieve(training_file.id).status\n",
+    "start_time = time.time()\n",
+    "while status != \"processed\":\n",
+    "    print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
+    "    time.sleep(5)\n",
+    "    status = openai.File.retrieve(training_file.id).status\n",
+    "print(f\"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60ee0476-3113-4dc8-a886-bce878c60b07",
+   "metadata": {},
+   "source": [
+    "With the file ready, it's time to kick off a training job."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "c376ddca-5b4f-4e5a-bf4e-6beeb467eacc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "job = openai.FineTuningJob.create(\n",
+    "    training_file=training_file.id,\n",
+    "    model=\"gpt-3.5-turbo\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "09344c60-0bee-4989-b8d1-4a8821553cc3",
+   "metadata": {},
+   "source": [
+    "Grab a cup of tea while your model is being prepared. This may take some time!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "22eae900-04ca-456b-ba51-1dfff1f8e0e1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Status=[running]... 524.95s\r"
+     ]
+    }
+   ],
+   "source": [
+    "status = openai.FineTuningJob.retrieve(job.id).status\n",
+    "start_time = time.time()\n",
+    "while status != \"succeeded\":\n",
+    "    print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
+    "    time.sleep(5)\n",
+    "    job = openai.FineTuningJob.retrieve(job.id)\n",
+    "    status = job.status"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "39e72616-a7d9-44b8-a4eb-506611d119f4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ft:gpt-3.5-turbo-0613:personal::7sKoRdlz\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(job.fine_tuned_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0d717749-b1b6-451f-b3c5-3286b82d45b9",
+   "metadata": {},
+   "source": [
+    "## 5. Use in LangChain\n",
+    "\n",
+    "You can use the resulting model ID directly the `ChatOpenAI` model class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "1579dfca-95c6-47b7-8549-1195b9dce5b0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "\n",
+    "model = ChatOpenAI(\n",
+    "    model=job.fine_tuned_model,\n",
+    "    temperature=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "6f53d1b1-dcbf-4976-a61a-17f74c6f1b0a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.prompts import ChatPromptTemplate\n",
+    "from langchain.schema.output_parser import StrOutputParser\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\"system\", \"You are speaking to hare.\"),\n",
+    "        (\"human\", \"{input}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "chain = prompt | model | StrOutputParser()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "6619c9bc-54ea-4136-bd9a-44557f7da724",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "A symbol of interconnectedness."
+     ]
+    }
+   ],
+   "source": [
+    "for tok in chain.stream({\"input\": \"What's the golden thread?\"}):\n",
+    "    print(tok, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88e0d1a1-48a9-4d9d-9f4e-010cbbb65af8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/libs/langchain/langchain/chat_loaders/imessage.py b/libs/langchain/langchain/chat_loaders/imessage.py
new file mode 100644
index 00000000000..10b7f778c80
--- /dev/null
+++ b/libs/langchain/langchain/chat_loaders/imessage.py
@@ -0,0 +1,117 @@
+"""IMessage Chat Loader.
+
+This class is used to load chat sessions from the iMessage chat.db SQLite file.
+It only works on macOS when you have iMessage enabled and have the chat.db file.
+
+The chat.db file is likely located at ~/Library/Messages/chat.db. However, your
+terminal may not have permission to access this file. To resolve this, you can
+copy the file to a different location, change the permissions of the file, or
+grant full disk access for your terminal emulator in System Settings > Security
+and Privacy > Full Disk Access.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, Iterator, List, Optional, Union
+
+from langchain import schema
+from langchain.chat_loaders import base as chat_loaders
+
+if TYPE_CHECKING:
+    import sqlite3
+
+
+class IMessageChatLoader(chat_loaders.BaseChatLoader):
+    def __init__(self, path: Optional[Union[str, Path]] = None):
+        """
+        Initialize the IMessageChatLoader.
+
+        Args:
+            path (str or Path, optional): Path to the chat.db SQLite file.
+                Defaults to None, in which case the default path
+                ~/Library/Messages/chat.db will be used.
+        """
+        if path is None:
+            path = Path.home() / "Library" / "Messages" / "chat.db"
+        self.db_path = path if isinstance(path, Path) else Path(path)
+        if not self.db_path.exists():
+            raise FileNotFoundError(f"File {self.db_path} not found")
+        try:
+            pass  # type: ignore
+        except ImportError as e:
+            raise ImportError(
+                "The sqlite3 module is required to load iMessage chats.\n"
+                "Please install it with `pip install pysqlite3`"
+            ) from e
+
+    def _load_single_chat_session(
+        self, cursor: "sqlite3.Cursor", chat_id: int
+    ) -> chat_loaders.ChatSession:
+        """
+        Load a single chat session from the iMessage chat.db.
+
+        Args:
+            cursor: SQLite cursor object.
+            chat_id (int): ID of the chat session to load.
+
+        Returns:
+            ChatSession: Loaded chat session.
+        """
+        results: List[schema.HumanMessage] = []
+
+        query = """
+        SELECT message.date, handle.id, message.text
+        FROM message
+        JOIN chat_message_join ON message.ROWID = chat_message_join.message_id
+        JOIN handle ON message.handle_id = handle.ROWID
+        WHERE chat_message_join.chat_id = ?
+        ORDER BY message.date ASC;
+        """
+        cursor.execute(query, (chat_id,))
+        messages = cursor.fetchall()
+
+        for date, sender, text in messages:
+            if text:  # Skip empty messages
+                results.append(
+                    schema.HumanMessage(
+                        role=sender,
+                        content=text,
+                        additional_kwargs={
+                            "message_time": date,
+                            "sender": sender,
+                        },
+                    )
+                )
+
+        return chat_loaders.ChatSession(messages=results)
+
+    def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
+        """
+        Lazy load the chat sessions from the iMessage chat.db
+        and yield them in the required format.
+
+        Yields:
+            ChatSession: Loaded chat session.
+        """
+
+        try:
+            conn = sqlite3.connect(self.db_path)
+        except sqlite3.OperationalError as e:
+            raise ValueError(
+                f"Could not open iMessage DB file {self.db_path}.\n"
+                "Make sure your terminal emulator has disk access to this file.\n"
+                "   You can either copy the DB file to an accessible location"
+                " or grant full disk access for your terminal emulator."
+                "  You can grant full disk access for your terminal emulator"
+                " in System Settings > Security and Privacy > Full Disk Access."
+            ) from e
+        cursor = conn.cursor()
+
+        # Fetch the list of chat IDs
+        cursor.execute("SELECT ROWID FROM chat")
+        chat_ids = [row[0] for row in cursor.fetchall()]
+
+        for chat_id in chat_ids:
+            yield self._load_single_chat_session(cursor, chat_id)
+
+        conn.close()

From 3103f07e03182b43e8beb6aeed2c595dc5b31446 Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Mon, 28 Aug 2023 14:40:22 -0700
Subject: [PATCH 09/19] Use existing required args obj if specified (#9883)

We always overwrote the required args but we infer them by default.
Doing it only the old way makes it so the llm guesses even if an arg is
optional (e.g., for uuids)
---
 libs/langchain/langchain/tools/convert_to_openai.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/tools/convert_to_openai.py b/libs/langchain/langchain/tools/convert_to_openai.py
index 82c0a47fbbd..e575b024d4b 100644
--- a/libs/langchain/langchain/tools/convert_to_openai.py
+++ b/libs/langchain/langchain/tools/convert_to_openai.py
@@ -19,7 +19,9 @@ def format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:
     if isinstance(tool, StructuredTool):
         schema_ = tool.args_schema.schema()
         # Bug with required missing for structured tools.
-        required = sorted(schema_["properties"])  # BUG WORKAROUND
+        required = schema_.get(
+            "required", sorted(schema_["properties"])  # Backup is a BUG WORKAROUND
+        )
         return {
             "name": tool.name,
             "description": tool.description,

From 907c57e3244e402d28914baf145a5c6fb4213f66 Mon Sep 17 00:00:00 2001
From: William FH <13333726+hinthornw@users.noreply.github.com>
Date: Mon, 28 Aug 2023 15:30:41 -0700
Subject: [PATCH 10/19] Add collect_runs callback (#9885)

---
 .../langchain/langchain/callbacks/__init__.py |  2 ++
 libs/langchain/langchain/callbacks/manager.py | 27 +++++++++++++++++++
 .../callbacks/test_run_collector.py           | 16 +++++++++++
 3 files changed, 45 insertions(+)
 create mode 100644 libs/langchain/tests/unit_tests/callbacks/test_run_collector.py

diff --git a/libs/langchain/langchain/callbacks/__init__.py b/libs/langchain/langchain/callbacks/__init__.py
index 8398741be34..4b9f93bf840 100644
--- a/libs/langchain/langchain/callbacks/__init__.py
+++ b/libs/langchain/langchain/callbacks/__init__.py
@@ -20,6 +20,7 @@ from langchain.callbacks.human import HumanApprovalCallbackHandler
 from langchain.callbacks.infino_callback import InfinoCallbackHandler
 from langchain.callbacks.labelstudio_callback import LabelStudioCallbackHandler
 from langchain.callbacks.manager import (
+    collect_runs,
     get_openai_callback,
     tracing_enabled,
     tracing_v2_enabled,
@@ -66,6 +67,7 @@ __all__ = [
     "get_openai_callback",
     "tracing_enabled",
     "tracing_v2_enabled",
+    "collect_runs",
     "wandb_tracing_enabled",
     "FlyteCallbackHandler",
     "SageMakerCallbackHandler",
diff --git a/libs/langchain/langchain/callbacks/manager.py b/libs/langchain/langchain/callbacks/manager.py
index 3f22832de3e..2f7a7fad478 100644
--- a/libs/langchain/langchain/callbacks/manager.py
+++ b/libs/langchain/langchain/callbacks/manager.py
@@ -38,6 +38,7 @@ from langchain.callbacks.base import (
 )
 from langchain.callbacks.openai_info import OpenAICallbackHandler
 from langchain.callbacks.stdout import StdOutCallbackHandler
+from langchain.callbacks.tracers import run_collector
 from langchain.callbacks.tracers.langchain import LangChainTracer
 from langchain.callbacks.tracers.langchain_v1 import LangChainTracerV1, TracerSessionV1
 from langchain.callbacks.tracers.stdout import ConsoleCallbackHandler
@@ -75,6 +76,11 @@ tracing_v2_callback_var: ContextVar[
 ] = ContextVar(  # noqa: E501
     "tracing_callback_v2", default=None
 )
+run_collector_var: ContextVar[
+    Optional[run_collector.RunCollectorCallbackHandler]
+] = ContextVar(  # noqa: E501
+    "run_collector", default=None
+)
 
 
 def _get_debug() -> bool:
@@ -184,6 +190,24 @@ def tracing_v2_enabled(
     tracing_v2_callback_var.set(None)
 
 
+@contextmanager
+def collect_runs() -> Generator[run_collector.RunCollectorCallbackHandler, None, None]:
+    """Collect all run traces in context.
+
+    Returns:
+        run_collector.RunCollectorCallbackHandler: The run collector callback handler.
+
+    Example:
+        >>> with collect_runs() as runs_cb:
+                chain.invoke("foo")
+                run_id = runs_cb.traced_runs[0].id
+    """
+    cb = run_collector.RunCollectorCallbackHandler()
+    run_collector_var.set(cb)
+    yield cb
+    run_collector_var.set(None)
+
+
 @contextmanager
 def trace_as_chain_group(
     group_name: str,
@@ -1712,6 +1736,7 @@ def _configure(
     tracer_project = os.environ.get(
         "LANGCHAIN_PROJECT", os.environ.get("LANGCHAIN_SESSION", "default")
     )
+    run_collector_ = run_collector_var.get()
     debug = _get_debug()
     if (
         verbose
@@ -1774,4 +1799,6 @@ def _configure(
             for handler in callback_manager.handlers
         ):
             callback_manager.add_handler(open_ai, True)
+    if run_collector_ is not None:
+        callback_manager.add_handler(run_collector_, False)
     return callback_manager
diff --git a/libs/langchain/tests/unit_tests/callbacks/test_run_collector.py b/libs/langchain/tests/unit_tests/callbacks/test_run_collector.py
new file mode 100644
index 00000000000..9fd031b7a82
--- /dev/null
+++ b/libs/langchain/tests/unit_tests/callbacks/test_run_collector.py
@@ -0,0 +1,16 @@
+"""Test the run collector."""
+
+import uuid
+
+from langchain.callbacks import collect_runs
+from tests.unit_tests.llms.fake_llm import FakeLLM
+
+
+def test_collect_runs() -> None:
+    llm = FakeLLM(queries={"hi": "hello"}, sequential_responses=True)
+    with collect_runs() as cb:
+        llm.predict("hi")
+        assert cb.traced_runs
+        assert len(cb.traced_runs) == 1
+        assert isinstance(cb.traced_runs[0].id, uuid.UUID)
+        assert cb.traced_runs[0].inputs == {"prompts": ["hi"]}

From fe1b9ee6b8daf16d363334cedd3cc5a5e19e4177 Mon Sep 17 00:00:00 2001
From: Piyush Jain <piyushjain@duck.com>
Date: Mon, 28 Aug 2023 16:01:43 -0700
Subject: [PATCH 11/19] Updated notebook for comprehend moderation (#9875)

### Description
Updated the notebook for comprehend moderation.

cc @baskaryan
---
 .../docs/guides/safety/amazon_comprehend_chain.ipynb      | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/docs/docs_skeleton/docs/guides/safety/amazon_comprehend_chain.ipynb b/docs/docs_skeleton/docs/guides/safety/amazon_comprehend_chain.ipynb
index 03277c16fed..e7e1961d42d 100644
--- a/docs/docs_skeleton/docs/guides/safety/amazon_comprehend_chain.ipynb
+++ b/docs/docs_skeleton/docs/guides/safety/amazon_comprehend_chain.ipynb
@@ -30,12 +30,7 @@
    "source": [
     "import boto3\n",
     "\n",
-    "comprehend_client = boto3.client('comprehend', \n",
-    "                                 region_name='us-east-1', \n",
-    "                                 aws_access_key_id=\"ASIA6BR6ZDLNQLMEGWHM\",\n",
-    "                                 aws_secret_access_key=\"Y79nefFoOfvgrog6sojSe55xTuKqDJY53BgfrtlG\",\n",
-    "                                 aws_session_token=\"IQoJb3JpZ2luX2VjEIP//////////wEaCXVzLWVhc3QtMSJGMEQCIBvUl0Wj5Gu5GrHB+i5fHkaVc2V1381M7UNRX8EggHORAiB+dG/uKJ4loHn2oAcXIEy6+lfU7wygl4zw/vUo2VItFiqfAghMEAIaDDk2NTQyNTU2ODQ3NSIMfbh8uyoO1XONSkuEKvwBTMxeDCi//9U9LGIwZZzIiHOudQAqR2wlIGZKcw//abSeHNBE1AoDT8ibcqk7EuIt9fwnj1WYiLGmSIWd9/kSZShiKdYg0UpNWyr1/LdeutV5byFAjT21RnWTgSMr0QeSCU698PFusvO1Coph8C75pcqTVYsxi/HypJT8OfB5iCxKgfzx0qD4X6hScpIAEYZhgQXHFBAeubqMkVPYEqSob6fSm1vEI8LkU8HG1N2M2p8TzGCQWo5uBgtNkipxve++bkR+xjiNLIpAN3P1xF2/W/lYlz+4xGsi90aZqIVh/tOvAjg7Yx1Dd5Ir2C0fZc7wbtabzVFlJZ7GFcpcMOX0o6cGOp4BismuW2CJRBmFFpoparqraQaiQBY/VDbQg9KQc/Y6o0oCxkESLUdY6ino3yrheT3W832eAg0RwrmEaQqT8kKGyJFimUxrAF/otNQhySLKuSXLooguammJiQAtgK1EhmuLBUBoLcngxQ31kDqw13g7Ccwuo68fnI/QzQLj5MX+V5VLCSp9VrOzi9XSjmeF/TJQARdZeL3CSeu2pATQc80=\"\n",
-    "                                )"
+    "comprehend_client = boto3.client('comprehend', region_name='us-east-1')"
    ]
   },
   {
@@ -389,7 +384,6 @@
     "comp_moderation_with_config = AmazonComprehendModerationChain(\n",
     "        moderation_config=moderation_config, # specify the configuration\n",
     "        client=comprehend_client,            # optionally pass the Boto3 Client\n",
-    "        force_base_exception=True,           # Force BaseModerationError\n",
     "        unique_id='john.doe@email.com',      # A unique ID\n",
     "        moderation_callback=my_callback,     # BaseModerationCallbackHandler\n",
     "        verbose=True\n",

From cf122b6269a841836a84940b5627c5444e069087 Mon Sep 17 00:00:00 2001
From: Leonid Ganeline <leo.gan.57@gmail.com>
Date: Mon, 28 Aug 2023 17:42:11 -0700
Subject: [PATCH 12/19] docs: `Infino` example fix (#9888)

- Fixed a broken link in the `integrations/providers/infino.mdx`
- Fixed a title in the `integration/collbacks/infino.ipynb` example
- Updated text format in this example.
---
 .../integrations/callbacks/infino.ipynb       | 82 ++++++++-----------
 docs/extras/integrations/providers/infino.mdx | 12 +--
 2 files changed, 38 insertions(+), 56 deletions(-)

diff --git a/docs/extras/integrations/callbacks/infino.ipynb b/docs/extras/integrations/callbacks/infino.ipynb
index 082e84c3a86..2b038932ca1 100644
--- a/docs/extras/integrations/callbacks/infino.ipynb
+++ b/docs/extras/integrations/callbacks/infino.ipynb
@@ -1,86 +1,73 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "8d10861f-a550-4443-bc63-4ce2ae13b841",
    "metadata": {},
    "source": [
-    "# Infino - LangChain LLM Monitoring Example\n",
+    "# Infino\n",
     "\n",
-    "This example shows how one can track the following while calling OpenAI models via LangChain and [Infino](https://github.com/infinohq/infino):\n",
+    "This example shows how one can track the following while calling OpenAI models via `LangChain` and [Infino](https://github.com/infinohq/infino):\n",
     "\n",
     "* prompt input,\n",
-    "* response from chatgpt or any other LangChain model,\n",
+    "* response from `ChatGPT` or any other `LangChain` model,\n",
     "* latency,\n",
     "* errors,\n",
     "* number of tokens consumed"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "3a5a0976-9953-41d8-880c-eb3f2992e936",
+   "cell_type": "markdown",
+   "id": "64d14c88-b71c-4524-ab1b-4250a7dbb62b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: matplotlib in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (3.7.1)\n",
-      "Requirement already satisfied: contourpy>=1.0.1 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (1.0.7)\n",
-      "Requirement already satisfied: cycler>=0.10 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (0.11.0)\n",
-      "Requirement already satisfied: fonttools>=4.22.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (4.39.4)\n",
-      "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (1.4.4)\n",
-      "Requirement already satisfied: numpy>=1.20 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (1.24.3)\n",
-      "Requirement already satisfied: packaging>=20.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (23.1)\n",
-      "Requirement already satisfied: pillow>=6.2.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (9.5.0)\n",
-      "Requirement already satisfied: pyparsing>=2.3.1 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (3.0.9)\n",
-      "Requirement already satisfied: python-dateutil>=2.7 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (2.8.2)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
-      "Requirement already satisfied: infinopy in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (0.0.1)\n",
-      "Requirement already satisfied: docker in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from infinopy) (6.1.3)\n",
-      "Requirement already satisfied: requests in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from infinopy) (2.31.0)\n",
-      "Requirement already satisfied: packaging>=14.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from docker->infinopy) (23.1)\n",
-      "Requirement already satisfied: urllib3>=1.26.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from docker->infinopy) (2.0.2)\n",
-      "Requirement already satisfied: websocket-client>=0.32.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from docker->infinopy) (1.5.2)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from requests->infinopy) (3.1.0)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from requests->infinopy) (3.4)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from requests->infinopy) (2023.5.7)\n"
-     ]
-    }
-   ],
+   "source": [
+    "## Initializing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed46c894-caa6-49b2-85d1-f275374fa308",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Install necessary dependencies.\n",
     "!pip install infinopy\n",
-    "!pip install matplotlib\n",
-    "\n",
+    "!pip install matplotlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a5a0976-9953-41d8-880c-eb3f2992e936",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "# Remove the (1) import sys and sys.path.append(..) and (2) uncomment `!pip install langchain` after merging the PR for Infino/LangChain integration.\n",
     "import sys\n",
     "\n",
     "sys.path.append(\"../../../../../langchain\")\n",
-    "#!pip install langchain\n",
     "\n",
     "\n",
     "import datetime as dt\n",
-    "from infinopy import InfinoClient\n",
     "import json\n",
     "from langchain.llms import OpenAI\n",
-    "from langchain.callbacks import InfinoCallbackHandler\n",
     "import matplotlib.pyplot as plt\n",
     "import matplotlib.dates as md\n",
     "import os\n",
     "import time\n",
-    "import sys"
+    "import sys\n",
+    "\n",
+    "from infinopy import InfinoClient\n",
+    "from langchain.callbacks import InfinoCallbackHandler"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "9f90210d-c805-4a0c-81e4-d5298942afc4",
    "metadata": {},
    "source": [
-    "## Start Infino server, initialize the Infino client\n"
+    "## Start Infino server, initialize the Infino client"
    ]
   },
   {
@@ -106,7 +93,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "b6b81cda-b841-43ee-8c5e-b1576555765f",
    "metadata": {},
@@ -148,7 +134,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "dce1b820-3f1a-4b94-b848-4c6032cadc18",
    "metadata": {},
@@ -214,7 +199,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "b68ec697-c922-4fd9-aad1-f49c6ac24e8a",
    "metadata": {},
@@ -326,7 +310,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "c3d61822-1781-4bc6-97a2-2abc5c2b2e75",
    "metadata": {},
@@ -364,12 +347,11 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "4b171074-c775-48e0-a4b3-f550e2c8eccb",
    "metadata": {},
    "source": [
-    "## Step 5: Stop infino server"
+    "## Stop infino server"
    ]
   },
   {
@@ -415,7 +397,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.4"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/providers/infino.mdx b/docs/extras/integrations/providers/infino.mdx
index dcca8af555c..2fb2cc62e4c 100644
--- a/docs/extras/integrations/providers/infino.mdx
+++ b/docs/extras/integrations/providers/infino.mdx
@@ -2,10 +2,10 @@
 
 >[Infino](https://github.com/infinohq/infino) is an open-source observability platform that stores both metrics and application logs together.
 
-Key features of infino include:
-- Metrics Tracking: Capture time taken by LLM model to handle request, errors, number of tokens, and costing indication for the particular LLM.
-- Data Tracking: Log and store prompt, request, and response data for each LangChain interaction.
-- Graph Visualization: Generate basic graphs over time, depicting metrics such as request duration, error occurrences, token count, and cost.
+Key features of `Infino` include:
+- **Metrics Tracking**: Capture time taken by LLM model to handle request, errors, number of tokens, and costing indication for the particular LLM.
+- **Data Tracking**: Log and store prompt, request, and response data for each LangChain interaction.
+- **Graph Visualization**: Generate basic graphs over time, depicting metrics such as request duration, error occurrences, token count, and cost.
 
 ## Installation and Setup
 
@@ -15,7 +15,7 @@ First, you'll need to install the  `infinopy` Python package as follows:
 pip install infinopy
 ```
 
-If you already have an Infino Server running, then you're good to go; but if
+If you already have an `Infino Server` running, then you're good to go; but if
 you don't, follow the next steps to start it:
 
 - Make sure you have Docker installed
@@ -28,7 +28,7 @@ you don't, follow the next steps to start it:
 
 ## Using Infino
 
-See a [usage example of `InfinoCallbackHandler`](/docs/modules/callbacks/integrations/infino.html).
+See a [usage example of `InfinoCallbackHandler`](/docs/integrations/callbacks/infino.html).
 
 ```python
 from langchain.callbacks import InfinoCallbackHandler

From f327535edae5636e72c2142ad0a5465b481679b0 Mon Sep 17 00:00:00 2001
From: maks-operlejn-ds <142261444+maks-operlejn-ds@users.noreply.github.com>
Date: Tue, 29 Aug 2023 02:52:16 +0200
Subject: [PATCH 13/19] Add conftest file to langchain experimental (#9886)

In order to use `requires` marker in langchain-experimental, there's a
need for *conftest.py* file inside. Everything is identical to the main
langchain module.

Co-authored-by: maks-operlejn-ds <maks.operlejn@gmail.com>
---
 .../experimental/tests/unit_tests/conftest.py | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 libs/experimental/tests/unit_tests/conftest.py

diff --git a/libs/experimental/tests/unit_tests/conftest.py b/libs/experimental/tests/unit_tests/conftest.py
new file mode 100644
index 00000000000..da45a330f50
--- /dev/null
+++ b/libs/experimental/tests/unit_tests/conftest.py
@@ -0,0 +1,83 @@
+"""Configuration for unit tests."""
+from importlib import util
+from typing import Dict, Sequence
+
+import pytest
+from pytest import Config, Function, Parser
+
+
+def pytest_addoption(parser: Parser) -> None:
+    """Add custom command line options to pytest."""
+    parser.addoption(
+        "--only-extended",
+        action="store_true",
+        help="Only run extended tests. Does not allow skipping any extended tests.",
+    )
+    parser.addoption(
+        "--only-core",
+        action="store_true",
+        help="Only run core tests. Never runs any extended tests.",
+    )
+
+
+def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) -> None:
+    """Add implementations for handling custom markers.
+
+    At the moment, this adds support for a custom `requires` marker.
+
+    The `requires` marker is used to denote tests that require one or more packages
+    to be installed to run. If the package is not installed, the test is skipped.
+
+    The `requires` marker syntax is:
+
+    .. code-block:: python
+
+        @pytest.mark.requires("package1", "package2")
+        def test_something():
+            ...
+    """
+    # Mapping from the name of a package to whether it is installed or not.
+    # Used to avoid repeated calls to `util.find_spec`
+    required_pkgs_info: Dict[str, bool] = {}
+
+    only_extended = config.getoption("--only-extended") or False
+    only_core = config.getoption("--only-core") or False
+
+    if only_extended and only_core:
+        raise ValueError("Cannot specify both `--only-extended` and `--only-core`.")
+
+    for item in items:
+        requires_marker = item.get_closest_marker("requires")
+        if requires_marker is not None:
+            if only_core:
+                item.add_marker(pytest.mark.skip(reason="Skipping not a core test."))
+                continue
+
+            # Iterate through the list of required packages
+            required_pkgs = requires_marker.args
+            for pkg in required_pkgs:
+                # If we haven't yet checked whether the pkg is installed
+                # let's check it and store the result.
+                if pkg not in required_pkgs_info:
+                    required_pkgs_info[pkg] = util.find_spec(pkg) is not None
+
+                if not required_pkgs_info[pkg]:
+                    if only_extended:
+                        pytest.fail(
+                            f"Package `{pkg}` is not installed but is required for "
+                            f"extended tests. Please install the given package and "
+                            f"try again.",
+                        )
+
+                    else:
+                        # If the package is not installed, we immediately break
+                        # and mark the test as skipped.
+                        item.add_marker(
+                            pytest.mark.skip(reason=f"Requires pkg: `{pkg}`")
+                        )
+                        break
+        else:
+            if only_extended:
+                item.add_marker(
+                    pytest.mark.skip(reason="Skipping not an extended test.")
+                )

From 47499c6db4886c958e5dad59b2b7506de39626e0 Mon Sep 17 00:00:00 2001
From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com>
Date: Mon, 28 Aug 2023 20:53:33 -0400
Subject: [PATCH 14/19] Avoid `type: ignore` suppression by adding mypy type
 hint. (#9881)

Mypy was not able to determine a good type for `type_to_loader_dict`,
since the values in the dict are functions whose return types are
related to each other in a complex way. One can see this by adding a
line like `reveal_type(type_to_loader_dict)` and running mypy, which
will get mypy to show what type it has inferred for that value.

Adding an explicit type hint to help out mypy avoids the need for a mypy
suppression and allows the code to type-check cleanly.
---
 libs/langchain/langchain/prompts/loading.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/libs/langchain/langchain/prompts/loading.py b/libs/langchain/langchain/prompts/loading.py
index 55d2013ecd9..84f35fa8f59 100644
--- a/libs/langchain/langchain/prompts/loading.py
+++ b/libs/langchain/langchain/prompts/loading.py
@@ -2,7 +2,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Union
+from typing import Callable, Dict, Union
 
 import yaml
 
@@ -26,10 +26,7 @@ def load_prompt_from_config(config: dict) -> BasePromptTemplate:
         raise ValueError(f"Loading {config_type} prompt not supported")
 
     prompt_loader = type_to_loader_dict[config_type]
-    # Unclear why type error is being thrown here.
-    # Incompatible return value type (got "Runnable[Dict[Any, Any], PromptValue]",
-    # expected "BasePromptTemplate")  [return-value]
-    return prompt_loader(config)  # type: ignore[return-value]
+    return prompt_loader(config)
 
 
 def _load_template(var_name: str, config: dict) -> dict:
@@ -148,8 +145,7 @@ def _load_prompt_from_file(file: Union[str, Path]) -> BasePromptTemplate:
     return load_prompt_from_config(config)
 
 
-type_to_loader_dict = {
+type_to_loader_dict: Dict[str, Callable[[dict], BasePromptTemplate]] = {
     "prompt": _load_prompt,
     "few_shot": _load_few_shot_prompt,
-    # "few_shot_with_templates": _load_few_shot_with_templates_prompt,
 }

From e01b00aa54865940fafedd9059f1fcd43ae89ed2 Mon Sep 17 00:00:00 2001
From: Leonid Ganeline <leo.gan.57@gmail.com>
Date: Mon, 28 Aug 2023 18:16:22 -0700
Subject: [PATCH 15/19] docs: `ainetwork` update (#9871)

* Added links to the AI Network
* Made title consistent to other tool kits
* Added `integrations/providers/` integration card page
* **No changes** in the example code!
---
 .../integrations/providers/ainetwork.mdx      | 23 +++++++++++++++
 .../integrations/toolkits/ainetwork.ipynb     | 28 +++++--------------
 2 files changed, 30 insertions(+), 21 deletions(-)
 create mode 100644 docs/extras/integrations/providers/ainetwork.mdx

diff --git a/docs/extras/integrations/providers/ainetwork.mdx b/docs/extras/integrations/providers/ainetwork.mdx
new file mode 100644
index 00000000000..16bcbab1ca0
--- /dev/null
+++ b/docs/extras/integrations/providers/ainetwork.mdx
@@ -0,0 +1,23 @@
+# AINetwork
+
+>[AI Network](https://www.ainetwork.ai/build-on-ain) is a layer 1 blockchain designed to accommodate 
+> large-scale AI models, utilizing a decentralized GPU network powered by the 
+> [$AIN token](https://www.ainetwork.ai/token), enriching AI-driven `NFTs` (`AINFTs`).
+
+
+## Installation and Setup
+
+You need to install `ain-py` python package.
+
+```bash
+pip install ain-py
+```
+You need to set the `AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY` environmental variable to your AIN Blockchain Account Private Key.
+## Toolkit
+
+See a [usage example](/docs/integrations/toolkits/ainetwork).
+
+```python
+from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
+```
+
diff --git a/docs/extras/integrations/toolkits/ainetwork.ipynb b/docs/extras/integrations/toolkits/ainetwork.ipynb
index b3a84afd00d..8991cb7b3d7 100644
--- a/docs/extras/integrations/toolkits/ainetwork.ipynb
+++ b/docs/extras/integrations/toolkits/ainetwork.ipynb
@@ -1,17 +1,17 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# AINetwork Toolkit\n",
+    "# AINetwork\n",
     "\n",
-    "The AINetwork Toolkit is a set of tools for interacting with the AINetwork Blockchain. These tools allow you to transfer AIN, read and write values, create apps, and set permissions for specific paths within the blockchain database."
+    ">[AI Network](https://www.ainetwork.ai/build-on-ain) is a layer 1 blockchain designed to accommodate large-scale AI models, utilizing a decentralized GPU network powered by the [$AIN token](https://www.ainetwork.ai/token), enriching AI-driven `NFTs` (`AINFTs`).\n",
+    ">\n",
+    ">The `AINetwork Toolkit` is a set of tools for interacting with the [AINetwork Blockchain](https://www.ainetwork.ai/public/whitepaper.pdf). These tools allow you to transfer `AIN`, read and write values, create apps, and set permissions for specific paths within the blockchain database."
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -30,7 +30,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -51,7 +50,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -96,7 +94,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -119,7 +116,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -147,7 +143,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -157,7 +152,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -174,7 +168,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -213,7 +206,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -250,7 +242,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -290,7 +281,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -337,7 +327,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -362,7 +351,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -397,7 +385,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -438,7 +425,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -453,9 +440,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.10.12"
-  },
-  "orig_nbformat": 4
+  }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }

From b1bffea9c7ba54060f08148e8e73f1ab7fa5f9e4 Mon Sep 17 00:00:00 2001
From: Leonid Ganeline <leo.gan.57@gmail.com>
Date: Mon, 28 Aug 2023 18:34:04 -0700
Subject: [PATCH 16/19] docs: fix for title of `llm_caching` nb (#9891)

Fixed title for the `extras/integrations/llms/llm_caching.ipynb`.
Existing title breaks the sorted order of items in the navbar.
Updated some formatting.
---
 .../integrations/llms/llm_caching.ipynb       | 67 +++++++++++++------
 1 file changed, 46 insertions(+), 21 deletions(-)

diff --git a/docs/extras/integrations/llms/llm_caching.ipynb b/docs/extras/integrations/llms/llm_caching.ipynb
index 9829cacb0ca..0d0ba397440 100644
--- a/docs/extras/integrations/llms/llm_caching.ipynb
+++ b/docs/extras/integrations/llms/llm_caching.ipynb
@@ -5,8 +5,9 @@
    "id": "f36d938c",
    "metadata": {},
    "source": [
-    "# Caching integrations\n",
-    "This notebook covers how to cache results of individual LLM calls."
+    "# LLM Caching integrations\n",
+    "\n",
+    "This notebook covers how to cache results of individual LLM calls using different caches."
    ]
   },
   {
@@ -26,9 +27,12 @@
   {
    "cell_type": "markdown",
    "id": "b50f0598",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
    "source": [
-    "## In Memory Cache"
+    "## `In Memory` Cache"
    ]
   },
   {
@@ -108,9 +112,12 @@
   {
    "cell_type": "markdown",
    "id": "4bf59c12",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
    "source": [
-    "## SQLite Cache"
+    "## `SQLite` Cache"
    ]
   },
   {
@@ -203,9 +210,12 @@
   {
    "cell_type": "markdown",
    "id": "278ad7ae",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
    "source": [
-    "## Redis Cache"
+    "## `Redis` Cache"
    ]
   },
   {
@@ -385,9 +395,12 @@
   {
    "cell_type": "markdown",
    "id": "684eab55",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
    "source": [
-    "## GPTCache\n",
+    "## `GPTCache`\n",
     "\n",
     "We can use [GPTCache](https://github.com/zilliztech/GPTCache) for exact match caching OR to cache results based on semantic similarity\n",
     "\n",
@@ -614,9 +627,12 @@
   {
    "cell_type": "markdown",
    "id": "726fe754",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
    "source": [
-    "## Momento Cache\n",
+    "## `Momento` Cache\n",
     "Use [Momento](/docs/ecosystem/integrations/momento.html) to cache prompts and responses.\n",
     "\n",
     "Requires momento to use, uncomment below to install:"
@@ -723,9 +739,14 @@
   {
    "cell_type": "markdown",
    "id": "934943dc",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
    "source": [
-    "## SQLAlchemy Cache"
+    "## `SQLAlchemy` Cache\n",
+    "\n",
+    "You can use `SQLAlchemyCache` to cache with any SQL database supported by `SQLAlchemy`."
    ]
   },
   {
@@ -735,8 +756,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# You can use SQLAlchemyCache to cache with any SQL database supported by SQLAlchemy.\n",
-    "\n",
     "# from langchain.cache import SQLAlchemyCache\n",
     "# from sqlalchemy import create_engine\n",
     "\n",
@@ -795,7 +814,10 @@
   {
    "cell_type": "markdown",
    "id": "0c69d84d",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
    "source": [
     "## Optional Caching\n",
     "You can also turn off caching for specific LLMs should you choose. In the example below, even though global caching is enabled, we turn it off for a specific LLM"
@@ -874,7 +896,10 @@
   {
    "cell_type": "markdown",
    "id": "5da41b77",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
    "source": [
     "## Optional Caching in Chains\n",
     "You can also turn off caching for particular nodes in chains. Note that because of certain interfaces, its often easier to construct the chain first, and then edit the LLM afterwards.\n",
@@ -1022,9 +1047,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "venv"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1036,7 +1061,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

From 5d47833ae13d4b9a5a233b5a5a93022c83f4d520 Mon Sep 17 00:00:00 2001
From: Xiaobing Mi <xiaomi@xingke888.com>
Date: Tue, 29 Aug 2023 10:26:23 +0800
Subject: [PATCH 17/19] Fix typo in web_scraping.ipynb (#9835)

---
 docs/extras/use_cases/web_scraping.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/extras/use_cases/web_scraping.ipynb b/docs/extras/use_cases/web_scraping.ipynb
index 2e6a8e31344..4f94b9792d7 100644
--- a/docs/extras/use_cases/web_scraping.ipynb
+++ b/docs/extras/use_cases/web_scraping.ipynb
@@ -143,7 +143,7 @@
     "\n",
     "Chromium is one of the browsers supported by Playwright, a library used to control browser automation. \n",
     "\n",
-    "Headless mode means that the browser is running without a graphical user interface, which is commonly used for web scrapin."
+    "Headless mode means that the browser is running without a graphical user interface, which is commonly used for web scraping."
    ]
   },
   {

From 7fdb7439e0b0bacd9428f6f41d67ec41986a06e9 Mon Sep 17 00:00:00 2001
From: Philippe PRADOS <github@prados.fr>
Date: Tue, 29 Aug 2023 04:29:35 +0200
Subject: [PATCH 18/19] Update google drive notebooks (#9851)

Update google drive doc loader and retriever notebooks. Show how to use with langchain-googledrive package.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 .../document_loaders/google_drive.ipynb       | 353 ++++++++++++++++--
 .../retrievers/google_drive.ipynb             | 279 ++++++++++++++
 .../integrations/toolkits/google_drive.ipynb  | 215 +++++++++++
 3 files changed, 811 insertions(+), 36 deletions(-)
 create mode 100644 docs/extras/integrations/retrievers/google_drive.ipynb
 create mode 100644 docs/extras/integrations/toolkits/google_drive.ipynb

diff --git a/docs/extras/integrations/document_loaders/google_drive.ipynb b/docs/extras/integrations/document_loaders/google_drive.ipynb
index e7cda8f0617..9d17e5df97a 100644
--- a/docs/extras/integrations/document_loaders/google_drive.ipynb
+++ b/docs/extras/integrations/document_loaders/google_drive.ipynb
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "878928a6-a5ae-4f74-b351-64e3b01733fe",
    "metadata": {
     "tags": []
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "2216c83f-68e4-4d2f-8ea2-5878fb18bbe7",
    "metadata": {
     "tags": []
@@ -66,7 +66,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "8f3b6aa0-b45d-4e37-8c50-5bebe70fdb9d",
    "metadata": {
     "tags": []
@@ -93,7 +93,7 @@
    "source": [
     "loader = GoogleDriveLoader(\n",
     "    folder_id=\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\",\n",
-    "    file_types=[\"document\", \"sheet\"]\n",
+    "    file_types=[\"document\", \"sheet\"],\n",
     "    recursive=False\n",
     ")"
    ]
@@ -110,7 +110,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "94207e39",
    "metadata": {},
    "outputs": [],
@@ -121,7 +121,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "a15fbee0",
    "metadata": {},
    "outputs": [],
@@ -136,7 +136,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "98410bda",
    "metadata": {},
    "outputs": [],
@@ -146,21 +146,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "e3e72221",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Document(page_content='\\n  \\n    \\n      Team\\n      Location\\n      Stanley Cups\\n    \\n    \\n      Blues\\n      STL\\n      1\\n    \\n    \\n      Flyers\\n      PHI\\n      2\\n    \\n    \\n      Maple Leafs\\n      TOR\\n      13\\n    \\n  \\n', metadata={'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'page_number': 1, 'page_name': 'Stanley Cups', 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n  <tbody>\\n    <tr>\\n      <td>Team</td>\\n      <td>Location</td>\\n      <td>Stanley Cups</td>\\n    </tr>\\n    <tr>\\n      <td>Blues</td>\\n      <td>STL</td>\\n      <td>1</td>\\n    </tr>\\n    <tr>\\n      <td>Flyers</td>\\n      <td>PHI</td>\\n      <td>2</td>\\n    </tr>\\n    <tr>\\n      <td>Maple Leafs</td>\\n      <td>TOR</td>\\n      <td>13</td>\\n    </tr>\\n  </tbody>\\n</table>', 'category': 'Table', 'source': 'https://drive.google.com/file/d/1aA6L2AR3g0CR-PW03HEZZo4NaVlKpaP7/view'})"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "docs[0]"
    ]
@@ -175,7 +164,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "0e2d093f",
    "metadata": {},
    "outputs": [],
@@ -190,7 +179,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "b35ddcc6",
    "metadata": {},
    "outputs": [],
@@ -200,21 +189,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "3cc141e0",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Document(page_content='\\n  \\n    \\n      Team\\n      Location\\n      Stanley Cups\\n    \\n    \\n      Blues\\n      STL\\n      1\\n    \\n    \\n      Flyers\\n      PHI\\n      2\\n    \\n    \\n      Maple Leafs\\n      TOR\\n      13\\n    \\n  \\n', metadata={'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'page_number': 1, 'page_name': 'Stanley Cups', 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n  <tbody>\\n    <tr>\\n      <td>Team</td>\\n      <td>Location</td>\\n      <td>Stanley Cups</td>\\n    </tr>\\n    <tr>\\n      <td>Blues</td>\\n      <td>STL</td>\\n      <td>1</td>\\n    </tr>\\n    <tr>\\n      <td>Flyers</td>\\n      <td>PHI</td>\\n      <td>2</td>\\n    </tr>\\n    <tr>\\n      <td>Maple Leafs</td>\\n      <td>TOR</td>\\n      <td>13</td>\\n    </tr>\\n  </tbody>\\n</table>', 'category': 'Table', 'source': 'https://drive.google.com/file/d/1aA6L2AR3g0CR-PW03HEZZo4NaVlKpaP7/view'})"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "docs[0]"
    ]
@@ -226,6 +204,309 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "83ac576b-48c9-4aad-a35e-e978ea32f746",
+   "metadata": {},
+   "source": [
+    "# Extended usage\n",
+    "An external component can manage the complexity of Google Drive : `langchain-googledrive`\n",
+    "It's compatible with the ̀`langchain.document_loaders.GoogleDriveLoader` and can be used\n",
+    "in its place.\n",
+    "\n",
+    "To be compatible with containers, the authentication uses an environment variable ̀GOOGLE_ACCOUNT_FILE` to credential file (for user or service)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b94f7119-bc1e-4ca3-907f-9d81e837ac59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install langchain-googledrive"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4c7474e-49cb-48a1-b3a0-77fba8e2dd70",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "folder_id='root'\n",
+    "#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8357f7f1-e2b1-41ef-8e38-48fcc3897dba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use the advanced version.\n",
+    "from langchain_googledrive.document_loaders import GoogleDriveLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16ab9d3d-1782-4cb9-ab56-d87edbb25a18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = GoogleDriveLoader(\n",
+    "    folder_id=folder_id,\n",
+    "    recursive=False,\n",
+    "    num_results=2,  # Maximum number of file to load\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ebac43aa-dd64-4964-802a-a90172415fd1",
+   "metadata": {},
+   "source": [
+    "By default, all files with these mime-type can be converted to `Document`.\n",
+    "- text/text\n",
+    "- text/plain\n",
+    "- text/html\n",
+    "- text/csv\n",
+    "- text/markdown\n",
+    "- image/png\n",
+    "- image/jpeg\n",
+    "- application/epub+zip\n",
+    "- application/pdf\n",
+    "- application/rtf\n",
+    "- application/vnd.google-apps.document (GDoc)\n",
+    "- application/vnd.google-apps.presentation (GSlide)\n",
+    "- application/vnd.google-apps.spreadsheet (GSheet)\n",
+    "- application/vnd.google.colaboratory (Notebook colab)\n",
+    "- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
+    "- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
+    "\n",
+    "It's possible to update or customize this. See the documentation of `GDriveLoader`.\n",
+    "\n",
+    "But, the corresponding packages must be installed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4560f35-a37d-44e2-be0b-adaa245b3b3d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install unstructured"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6cb08da3-27df-46de-b60e-583bb7e31af4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for doc in loader.load():\n",
+    "    print(\"---\")\n",
+    "    print(doc.page_content.strip()[:60]+\"...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cd13d7d1-db7a-498d-ac98-76ccd9ad9019",
+   "metadata": {},
+   "source": [
+    "## Customize the search pattern\n",
+    "\n",
+    "All parameter compatible with Google [`list()`](https://developers.google.com/drive/api/v3/reference/files/list)\n",
+    "API can be set.\n",
+    "\n",
+    "To specify the new pattern of the Google request, you can use a `PromptTemplate()`.\n",
+    "The variables for the prompt can be set with `kwargs` in the constructor.\n",
+    "Some pre-formated request are proposed (use `{query}`, `{folder_id}` and/or `{mime_type}`):\n",
+    "\n",
+    "You can customize the criteria to select the files. A set of predefined filter are proposed:\n",
+    "| template                               | description                                                           |\n",
+    "| -------------------------------------- | --------------------------------------------------------------------- |\n",
+    "| gdrive-all-in-folder                   | Return all compatible files from a `folder_id`                        |\n",
+    "| gdrive-query                           | Search `query` in all drives                                          |\n",
+    "| gdrive-by-name                         | Search file with name `query`                                        |\n",
+    "| gdrive-query-in-folder                 | Search `query` in `folder_id` (and sub-folders if `recursive=true`)  |\n",
+    "| gdrive-mime-type                       | Search a specific `mime_type`                                         |\n",
+    "| gdrive-mime-type-in-folder             | Search a specific `mime_type` in `folder_id`                          |\n",
+    "| gdrive-query-with-mime-type            | Search `query` with a specific `mime_type`                            |\n",
+    "| gdrive-query-with-mime-type-and-folder | Search `query` with a specific `mime_type` and in `folder_id`         |\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "81348d59-8fd6-45d4-9de3-5df5cff5c7e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = GoogleDriveLoader(\n",
+    "    folder_id=folder_id,\n",
+    "    recursive=False,\n",
+    "    template=\"gdrive-query\",  # Default template to use\n",
+    "    query=\"machine learning\",\n",
+    "    num_results=2,            # Maximum number of file to load\n",
+    "    supportsAllDrives=False,  # GDrive `list()` parameter\n",
+    ")\n",
+    "for doc in loader.load():\n",
+    "    print(\"---\")\n",
+    "    print(doc.page_content.strip()[:60]+\"...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "46c6ba5b-d4b1-4f0f-9801-5c1314021605",
+   "metadata": {},
+   "source": [
+    "You can customize your pattern."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a5a323b-8d96-46b7-b46a-fd69bd2c8e04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.prompts.prompt import PromptTemplate\n",
+    "loader = GoogleDriveLoader(\n",
+    "    folder_id=folder_id,\n",
+    "    recursive=False,\n",
+    "    template=PromptTemplate(\n",
+    "        input_variables=[\"query\", \"query_name\"],\n",
+    "        template=\"fullText contains '{query}' and name contains '{query_name}' and trashed=false\",\n",
+    "        ),  # Default template to use\n",
+    "    query=\"machine learning\",\n",
+    "    query_name=\"ML\",    \n",
+    "    num_results=2,  # Maximum number of file to load\n",
+    ")\n",
+    "for doc in loader.load():\n",
+    "    print(\"---\")\n",
+    "    print(doc.page_content.strip()[:60]+\"...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "375bb465-8f69-407b-94bd-ffa3718ef500",
+   "metadata": {},
+   "source": [
+    "### Modes for GSlide and GSheet\n",
+    "The parameter mode accepts different values:\n",
+    "\n",
+    "- \"document\": return the body of each document\n",
+    "- \"snippets\": return the description of each file (set in metadata of Google Drive files).\n",
+    "\n",
+    "\n",
+    "The conversion can manage in Markdown format:\n",
+    "- bullet\n",
+    "- link\n",
+    "- table\n",
+    "- titles\n",
+    "\n",
+    "The parameter `gslide_mode` accepts different values:\n",
+    "\n",
+    "- \"single\" : one document with &lt;PAGE BREAK&gt;\n",
+    "- \"slide\" : one document by slide\n",
+    "- \"elements\" : one document for each elements.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7493d7b0-0600-49af-8107-7f4597c92de7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = GoogleDriveLoader(\n",
+    "    template=\"gdrive-mime-type\",\n",
+    "    mime_type=\"application/vnd.google-apps.presentation\", # Only GSlide files\n",
+    "    gslide_mode=\"slide\",\n",
+    "    num_results=2,  # Maximum number of file to load\n",
+    ")\n",
+    "for doc in loader.load():\n",
+    "    print(\"---\")\n",
+    "    print(doc.page_content.strip()[:60]+\"...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9bf338fb-02d7-452f-8679-c50419b13464",
+   "metadata": {},
+   "source": [
+    "The parameter `gsheet_mode` accepts different values:\n",
+    "- `\"single\"`: Generate one document by line\n",
+    "- `\"elements\"` : one document with markdown array and &lt;PAGE BREAK&gt; tags."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "469f5af0-67db-4f15-8aee-88cde480729b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = GoogleDriveLoader(\n",
+    "    template=\"gdrive-mime-type\",\n",
+    "    mime_type=\"application/vnd.google-apps.spreadsheet\", # Only GSheet files\n",
+    "    gsheet_mode=\"elements\",\n",
+    "    num_results=2,  # Maximum number of file to load\n",
+    ")\n",
+    "for doc in loader.load():\n",
+    "    print(\"---\")\n",
+    "    print(doc.page_content.strip()[:60]+\"...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "09acb864-e919-4add-9e06-deba6f7f0cd8",
+   "metadata": {},
+   "source": [
+    "## Advanced usage\n",
+    "All Google File have a 'description' in the metadata. This field can be used to memorize a summary of the document or others indexed tags (See method `lazy_update_description_with_summary()`).\n",
+    "\n",
+    "If you use the `mode=\"snippet\"`, only the description will be used for the body. Else, the `metadata['summary']` has the field.\n",
+    "\n",
+    "Sometime, a specific filter can be used to extract some information from the filename, to select some files with specific criteria. You can use a filter.\n",
+    "\n",
+    "Sometimes, many documents are returned. It's not necessary to have all documents in memory at the same time. You can use the lazy versions of methods, to get one document at a time. It's better to use a complex query in place of a recursive search. For each folder, a query must be applied if you activate `recursive=True`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5e9c8eb-a266-4ae6-a760-d7826a0aa7c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "loader = GoogleDriveLoader(\n",
+    "                gdrive_api_file=os.environ[\"GOOGLE_ACCOUNT_FILE\"],\n",
+    "                num_results=2,\n",
+    "                template=\"gdrive-query\",\n",
+    "                filter=lambda search, file: \"#test\" not in file.get('description',''),\n",
+    "                query='machine learning',\n",
+    "                supportsAllDrives=False,\n",
+    "                )\n",
+    "for doc in loader.load():\n",
+    "    print(\"---\")\n",
+    "    print(doc.page_content.strip()[:60]+\"...\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51efa73a-4e2d-4f9c-abaf-6c9bde2ff69d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -244,7 +525,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.1"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/retrievers/google_drive.ipynb b/docs/extras/integrations/retrievers/google_drive.ipynb
new file mode 100644
index 00000000000..3acb14cbc1a
--- /dev/null
+++ b/docs/extras/integrations/retrievers/google_drive.ipynb
@@ -0,0 +1,279 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b0ed136e-6983-4893-ae1b-b75753af05f8",
+   "metadata": {},
+   "source": [
+    "# Google Drive Retriever\n",
+    "This notebook covers how to retrieve documents from Google Drive.\n",
+    "\n",
+    "## Prerequisites\n",
+    "\n",
+    "1. Create a Google Cloud project or use an existing project\n",
+    "1. Enable the [Google Drive API](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com)\n",
+    "1. [Authorize credentials for desktop app](https://developers.google.com/drive/api/quickstart/python#authorize_credentials_for_a_desktop_application)\n",
+    "1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
+    "\n",
+    "## Instructions for retrieving your Google Docs data\n",
+    "By default, the `GoogleDriveRetriever` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `GOOGLE_ACCOUNT_FILE` environment variable. \n",
+    "The location of `token.json` use the same directory (or use the parameter `token_path`). Note that `token.json` will be created automatically the first time you use the retriever.\n",
+    "\n",
+    "`GoogleDriveRetriever` can retrieve a selection of files with some requests. \n",
+    "\n",
+    "By default, If you use a `folder_id`, all the files inside this folder can be retrieved to `Document`.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "35b94a93-97de-4af8-9cca-de9ffb7930c3",
+   "metadata": {},
+   "source": [
+    "You can obtain your folder and document id from the URL:\n",
+    "* Folder: https://drive.google.com/drive/u/0/folders/1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5 -> folder id is `\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\"`\n",
+    "* Document: https://docs.google.com/document/d/1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw/edit -> document id is `\"1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw\"`\n",
+    "\n",
+    "The special value `root` is for your personal home."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c9665c9-a023-4078-9d95-e43021cecb6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "878928a6-a5ae-4f74-b351-64e3b01733fe",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-09T10:45:59.438650905Z",
+     "start_time": "2023-05-09T10:45:57.955900302Z"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.retrievers import GoogleDriveRetriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "755907c2-145d-4f0f-9b15-07a628a2d2d2",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-09T10:45:59.442890834Z",
+     "start_time": "2023-05-09T10:45:59.440941528Z"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "folder_id=\"root\"\n",
+    "#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2216c83f-68e4-4d2f-8ea2-5878fb18bbe7",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-09T10:45:59.795842403Z",
+     "start_time": "2023-05-09T10:45:59.445262457Z"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "retriever = GoogleDriveRetriever(\n",
+    "    num_results=2,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fa339ca0-f478-440c-ba80-0e5f41a19ce1",
+   "metadata": {},
+   "source": [
+    "By default, all files with these mime-type can be converted to `Document`.\n",
+    "- text/text\n",
+    "- text/plain\n",
+    "- text/html\n",
+    "- text/csv\n",
+    "- text/markdown\n",
+    "- image/png\n",
+    "- image/jpeg\n",
+    "- application/epub+zip\n",
+    "- application/pdf\n",
+    "- application/rtf\n",
+    "- application/vnd.google-apps.document (GDoc)\n",
+    "- application/vnd.google-apps.presentation (GSlide)\n",
+    "- application/vnd.google-apps.spreadsheet (GSheet)\n",
+    "- application/vnd.google.colaboratory (Notebook colab)\n",
+    "- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
+    "- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
+    "\n",
+    "It's possible to update or customize this. See the documentation of `GDriveRetriever`.\n",
+    "\n",
+    "But, the corresponding packages must be installed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9dadec48",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install unstructured"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f3b6aa0-b45d-4e37-8c50-5bebe70fdb9d",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-05-09T10:46:00.990310466Z",
+     "start_time": "2023-05-09T10:45:59.798774595Z"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "retriever.get_relevant_documents(\"machine learning\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ff33817-8619-4897-8742-2216b9934d2a",
+   "metadata": {},
+   "source": [
+    "You can customize the criteria to select the files. A set of predefined filter are proposed:\n",
+    "| template                               | description                                                           |\n",
+    "| -------------------------------------- | --------------------------------------------------------------------- |\n",
+    "| gdrive-all-in-folder                   | Return all compatible files from a `folder_id`                        |\n",
+    "| gdrive-query                           | Search `query` in all drives                                          |\n",
+    "| gdrive-by-name                         | Search file with name `query`)                                        |\n",
+    "| gdrive-query-in-folder                 | Search `query` in `folder_id` (and sub-folders in `_recursive=true`)  |\n",
+    "| gdrive-mime-type                       | Search a specific `mime_type`                                         |\n",
+    "| gdrive-mime-type-in-folder             | Search a specific `mime_type` in `folder_id`                          |\n",
+    "| gdrive-query-with-mime-type            | Search `query` with a specific `mime_type`                            |\n",
+    "| gdrive-query-with-mime-type-and-folder | Search `query` with a specific `mime_type` and in `folder_id`         |"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9977c712-9659-4959-b508-f59cc7d49d44",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "retriever = GoogleDriveRetriever(\n",
+    "    template=\"gdrive-query\", # Search everywhere\n",
+    "    num_results=2,  # But take only 2 documents\n",
+    ")\n",
+    "for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
+    "    print(\"---\")\n",
+    "    print(doc.page_content.strip()[:60]+\"...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a5a0f3ef-26fb-4a5c-85f0-5aba90b682b1",
+   "metadata": {},
+   "source": [
+    "Else, you can customize the prompt with a specialized `PromptTemplate`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b0bbebde-0487-4d20-9d77-8070e4f0e0d6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain import PromptTemplate\n",
+    "retriever = GoogleDriveRetriever(\n",
+    "    template=PromptTemplate(input_variables=['query'],\n",
+    "                            # See https://developers.google.com/drive/api/guides/search-files\n",
+    "                            template=\"(fullText contains '{query}') \"\n",
+    "                              \"and mimeType='application/vnd.google-apps.document' \"\n",
+    "                              \"and modifiedTime > '2000-01-01T00:00:00' \"\n",
+    "                              \"and trashed=false\"),\n",
+    "    num_results=2,\n",
+    "    # See https://developers.google.com/drive/api/v3/reference/files/list\n",
+    "    includeItemsFromAllDrives=False,\n",
+    "    supportsAllDrives=False,\n",
+    ")\n",
+    "for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
+    "    print(f\"{doc.metadata['name']}:\")\n",
+    "    print(\"---\")\n",
+    "    print(doc.page_content.strip()[:60]+\"...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9b6fed29-1666-452e-b677-401613270388",
+   "metadata": {},
+   "source": [
+    "# Use GDrive 'description' metadata\n",
+    "Each Google Drive has a `description` field in metadata (see the *details of a file*).\n",
+    "Use the `snippets` mode to return the description of selected files.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "342dbe12-ed83-40f4-8957-0cc8c4609542",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "retriever = GoogleDriveRetriever(\n",
+    "    template='gdrive-mime-type-in-folder',\n",
+    "    folder_id=folder_id,\n",
+    "    mime_type='application/vnd.google-apps.document',  # Only Google Docs\n",
+    "    num_results=2,\n",
+    "    mode='snippets',\n",
+    "    includeItemsFromAllDrives=False,\n",
+    "    supportsAllDrives=False,\n",
+    ")\n",
+    "retriever.get_relevant_documents(\"machine learning\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/extras/integrations/toolkits/google_drive.ipynb b/docs/extras/integrations/toolkits/google_drive.ipynb
new file mode 100644
index 00000000000..6cf1cb56d46
--- /dev/null
+++ b/docs/extras/integrations/toolkits/google_drive.ipynb
@@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Google Drive tool\n",
+    "\n",
+    "This notebook walks through connecting a LangChain to the Google Drive API.\n",
+    "\n",
+    "## Prerequisites\n",
+    "\n",
+    "1. Create a Google Cloud project or use an existing project\n",
+    "1. Enable the [Google Drive API](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com)\n",
+    "1. [Authorize credentials for desktop app](https://developers.google.com/drive/api/quickstart/python#authorize_credentials_for_a_desktop_application)\n",
+    "1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
+    "\n",
+    "## Instructions for retrieving your Google Docs data\n",
+    "By default, the `GoogleDriveTools` and `GoogleDriveWrapper` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `GOOGLE_ACCOUNT_FILE` environment variable. \n",
+    "The location of `token.json` use the same directory (or use the parameter `token_path`). Note that `token.json` will be created automatically the first time you use the tool.\n",
+    "\n",
+    "`GoogleDriveSearchTool` can retrieve a selection of files with some requests. \n",
+    "\n",
+    "By default, If you use a `folder_id`, all the files inside this folder can be retrieved to `Document`, if the name match the query.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can obtain your folder and document id from the URL:\n",
+    "* Folder: https://drive.google.com/drive/u/0/folders/1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5 -> folder id is `\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\"`\n",
+    "* Document: https://docs.google.com/document/d/1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw/edit -> document id is `\"1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw\"`\n",
+    "\n",
+    "The special value `root` is for your personal home."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "folder_id=\"root\"\n",
+    "#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "By default, all files with these mime-type can be converted to `Document`.\n",
+    "- text/text\n",
+    "- text/plain\n",
+    "- text/html\n",
+    "- text/csv\n",
+    "- text/markdown\n",
+    "- image/png\n",
+    "- image/jpeg\n",
+    "- application/epub+zip\n",
+    "- application/pdf\n",
+    "- application/rtf\n",
+    "- application/vnd.google-apps.document (GDoc)\n",
+    "- application/vnd.google-apps.presentation (GSlide)\n",
+    "- application/vnd.google-apps.spreadsheet (GSheet)\n",
+    "- application/vnd.google.colaboratory (Notebook colab)\n",
+    "- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
+    "- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
+    "\n",
+    "It's possible to update or customize this. See the documentation of `GoogleDriveAPIWrapper`.\n",
+    "\n",
+    "But, the corresponding packages must installed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install unstructured"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.utilities.google_drive import GoogleDriveAPIWrapper\n",
+    "from langchain.tools.google_drive.tool import GoogleDriveSearchTool\n",
+    "\n",
+    "# By default, search only in the filename.\n",
+    "tool = GoogleDriveSearchTool(\n",
+    "    api_wrapper=GoogleDriveAPIWrapper(\n",
+    "        folder_id=folder_id,\n",
+    "        num_results=2,\n",
+    "        template=\"gdrive-query-in-folder\", # Search in the body of documents\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "logging.basicConfig(level=logging.INFO)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tool.run(\"machine learning\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tool.description"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.agents import load_tools\n",
+    "tools = load_tools([\"google-drive-search\"],\n",
+    "                   folder_id=folder_id,\n",
+    "                   template=\"gdrive-query-in-folder\",\n",
+    "                  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use within an Agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain import OpenAI\n",
+    "from langchain.agents import initialize_agent, AgentType\n",
+    "llm = OpenAI(temperature=0)\n",
+    "agent = initialize_agent(\n",
+    "    tools=tools,\n",
+    "    llm=llm,\n",
+    "    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "agent.run(\n",
+    "    \"Search in google drive, who is 'Yann LeCun' ?\"\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From e80834d783c6306a68df54e6251d9fc307aee87c Mon Sep 17 00:00:00 2001
From: "Mazhar (Taha) Mumbaiwala" <mazharsaif@outlook.com>
Date: Tue, 29 Aug 2023 08:00:00 +0530
Subject: [PATCH 19/19] docs: Fix spelling mistakes in Etherscan.ipynb (#9845)

---
 docs/extras/integrations/document_loaders/Etherscan.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/extras/integrations/document_loaders/Etherscan.ipynb b/docs/extras/integrations/document_loaders/Etherscan.ipynb
index 059211f14ef..120d1db9edc 100644
--- a/docs/extras/integrations/document_loaders/Etherscan.ipynb
+++ b/docs/extras/integrations/document_loaders/Etherscan.ipynb
@@ -8,9 +8,9 @@
     "# Etherscan Loader\n",
     "## Overview\n",
     "\n",
-    "The Etherscan loader use etherscan api to load transacactions histories under specific account on Ethereum Mainnet.\n",
+    "The Etherscan loader use etherscan api to load transaction histories under specific account on Ethereum Mainnet.\n",
     "\n",
-    "You will need a Etherscan api key to proceed. The free api key has 5 calls per seconds quota.\n",
+    "You will need a Etherscan api key to proceed. The free api key has 5 calls per second quota.\n",
     "\n",
     "The loader supports the following six functinalities:\n",
     "* Retrieve normal transactions under specific account on Ethereum Mainet\n",