bump core dep in langchain

core: release 0.3.0.dev2 (#26120 )
Merge branch 'v0.3rc' into v0.3/dev_releases
2026-02-04 16:20:16 +00:00 · 2024-09-05 17:49:15 -04:00 · 2024-09-05 17:39:17 -04:00 · 2024-09-05 17:00:16 -04:00 · 2024-09-05 16:59:52 -04:00 · 2024-09-05 16:36:13 -04:00
1202 changed files with 33558 additions and 30156 deletions
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -16,6 +16,10 @@ LANGCHAIN_DIRS = [
    "libs/experimental",
 ]

+# for 0.3rc, we are ignoring core dependents
+# in order to be able to get CI to pass for individual PRs.
+IGNORE_CORE_DEPENDENTS = True
+
 # ignored partners are removed from dependents
 # but still run if directly edited
 IGNORED_PARTNERS = [
@@ -23,9 +27,6 @@ IGNORED_PARTNERS = [
    # specifically in huggingface jobs
    # https://github.com/langchain-ai/langchain/issues/25558
    "huggingface",
-    # remove ai21 because of breaking changes in sdk version 2.14.0
-    # that have not been fixed yet
-    "ai21",
 ]


@@ -107,7 +108,7 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
            {"working-directory": dir_, "python-version": f"3.{v}"}
            for v in range(8, 13)
        ]
-    min_python = "3.8"
+    min_python = "3.9"
    max_python = "3.12"

    # custom logic for specific directories
@@ -187,6 +188,9 @@ if __name__ == "__main__":
            # for extended testing
            found = False
            for dir_ in LANGCHAIN_DIRS:
+                if dir_ == "libs/core" and IGNORE_CORE_DEPENDENTS:
+                    dirs_to_run["extended-test"].add(dir_)
+                    continue
                if file.startswith(dir_):
                    found = True
                if found:
@@ -198,7 +202,6 @@ if __name__ == "__main__":
            dirs_to_run["test"].add("libs/partners/mistralai")
            dirs_to_run["test"].add("libs/partners/openai")
            dirs_to_run["test"].add("libs/partners/anthropic")
-            dirs_to_run["test"].add("libs/partners/ai21")
            dirs_to_run["test"].add("libs/partners/fireworks")
            dirs_to_run["test"].add("libs/partners/groq")

--- a/.github/scripts/check_prerelease_dependencies.py
+++ b/.github/scripts/check_prerelease_dependencies.py
@@ -11,7 +11,7 @@ if __name__ == "__main__":

    # see if we're releasing an rc
    version = toml_data["tool"]["poetry"]["version"]
-    releasing_rc = "rc" in version
+    releasing_rc = "rc" in version or "dev" in version

    # if not, iterate through dependencies and make sure none allow prereleases
    if not releasing_rc:
--- a/.github/workflows/_dependencies.yml
+++ b/.github/workflows/_dependencies.yml
@@ -1,114 +0,0 @@
-name: dependencies
-
-on:
-  workflow_call:
-    inputs:
-      working-directory:
-        required: true
-        type: string
-        description: "From which folder this pipeline executes"
-      langchain-location:
-        required: false
-        type: string
-        description: "Relative path to the langchain library folder"
-      python-version:
-        required: true
-        type: string
-        description: "Python version to use"
-
-env:
-  POETRY_VERSION: "1.7.1"
-
-jobs:
-  build:
-    defaults:
-      run:
-        working-directory: ${{ inputs.working-directory }}
-    runs-on: ubuntu-latest
-    name: dependency checks ${{ inputs.python-version }}
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python ${{ inputs.python-version }} + Poetry ${{ env.POETRY_VERSION }}
-        uses: "./.github/actions/poetry_setup"
-        with:
-          python-version: ${{ inputs.python-version }}
-          poetry-version: ${{ env.POETRY_VERSION }}
-          working-directory: ${{ inputs.working-directory }}
-          cache-key: pydantic-cross-compat
-
-      - name: Install dependencies
-        shell: bash
-        run: poetry install
-
-      - name: Check imports with base dependencies
-        shell: bash
-        run: poetry run make check_imports
-
-      - name: Install test dependencies
-        shell: bash
-        run: poetry install --with test
-
-      - name: Install langchain editable
-        working-directory: ${{ inputs.working-directory }}
-        if: ${{ inputs.langchain-location }}
-        env:
-          LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
-        run: |
-          poetry run pip install -e "$LANGCHAIN_LOCATION"
-
-      - name: Install the opposite major version of pydantic
-        # If normal tests use pydantic v1, here we'll use v2, and vice versa.
-        shell: bash
-        # airbyte currently doesn't support pydantic v2
-        if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
-        run: |
-          # Determine the major part of pydantic version
-          REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
-
-          if [[ "$REGULAR_VERSION" == "1" ]]; then
-            PYDANTIC_DEP=">=2.1,<3"
-            TEST_WITH_VERSION="2"
-          elif [[ "$REGULAR_VERSION" == "2" ]]; then
-            PYDANTIC_DEP="<2"
-            TEST_WITH_VERSION="1"
-          else
-            echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
-            exit 1
-          fi
-
-          # Install via `pip` instead of `poetry add` to avoid changing lockfile,
-          # which would prevent caching from working: the cache would get saved
-          # to a different key than where it gets loaded from.
-          poetry run pip install "pydantic${PYDANTIC_DEP}"
-
-          # Ensure that the correct pydantic is installed now.
-          echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
-
-          # Determine the major part of pydantic version
-          CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
-
-          # Check that the major part of pydantic version is as expected, if not
-          # raise an error
-          if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
-            echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
-            exit 1
-          fi
-          echo "Found pydantic version ${CURRENT_VERSION}, as expected"
-      - name: Run pydantic compatibility tests
-        # airbyte currently doesn't support pydantic v2
-        if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
-        shell: bash
-        run: make test
-
-      - name: Ensure the tests did not create any additional files
-        shell: bash
-        run: |
-          set -eu
-
-          STATUS="$(git status)"
-          echo "$STATUS"
-
-          # grep will exit non-zero if the target message isn't found,
-          # and `set -e` above will cause the step to fail.
-          echo "$STATUS" | grep 'nothing to commit, working tree clean'
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -89,19 +89,6 @@ jobs:
      python-version: ${{ matrix.job-configs.python-version }}
    secrets: inherit

-  dependencies:
-    name: cd ${{ matrix.job-configs.working-directory }}
-    needs: [ build ]
-    if: ${{ needs.build.outputs.dependencies != '[]' }}
-    strategy:
-      matrix:
-        job-configs: ${{ fromJson(needs.build.outputs.dependencies) }}
-    uses: ./.github/workflows/_dependencies.yml
-    with:
-      working-directory: ${{ matrix.job-configs.working-directory }}
-      python-version: ${{ matrix.job-configs.python-version }}
-    secrets: inherit
-
  extended-tests:
    name: "cd ${{ matrix.job-configs.working-directory }} / make extended_tests #${{ matrix.job-configs.python-version }}"
    needs: [ build ]
@@ -149,7 +136,7 @@ jobs:
          echo "$STATUS" | grep 'nothing to commit, working tree clean'
  ci_success:
    name: "CI Success"
-    needs: [build, lint, test, compile-integration-tests, dependencies, extended-tests, test-doc-imports]
+    needs: [build, lint, test, compile-integration-tests, extended-tests, test-doc-imports]
    if: |
      always()
    runs-on: ubuntu-latest
--- a/.github/workflows/scheduled_test.yml
+++ b/.github/workflows/scheduled_test.yml
@@ -17,16 +17,14 @@ jobs:
      fail-fast: false
      matrix:
        python-version:
-          - "3.8"
+          - "3.9"
          - "3.11"
        working-directory:
          - "libs/partners/openai"
          - "libs/partners/anthropic"
-          - "libs/partners/ai21"
          - "libs/partners/fireworks"
          - "libs/partners/groq"
          - "libs/partners/mistralai"
-          - "libs/partners/together"
          - "libs/partners/google-vertexai"
          - "libs/partners/google-genai"
          - "libs/partners/aws"
@@ -90,11 +88,9 @@ jobs:
          AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
          AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
          AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
-          AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
-          TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
          COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
--- a/cookbook/README.md
+++ b/cookbook/README.md
@@ -4,6 +4,8 @@ Example code for building applications with LangChain, with an emphasis on more

 Notebook | Description
 :- | :-
+[agent_fireworks_ai_langchain_mongodb.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb) | Build an AI Agent With Memory Using MongoDB, LangChain and FireWorksAI.
+[mongodb-langchain-cache-memory.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/mongodb-langchain-cache-memory.ipynb) | Build a RAG Application with Semantic Cache Using MongoDB and LangChain.
 [LLaMA2_sql_chat.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/LLaMA2_sql_chat.ipynb) | Build a chat application that interacts with a SQL database using an open source llm (llama2), specifically demonstrated on an SQLite database containing rosters.
 [Semi_Structured_RAG.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_Structured_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data, including text and tables, using unstructured for parsing, multi-vector retriever for storing, and lcel for implementing chains.
 [Semi_structured_and_multi_moda...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using unstructured for parsing, multi-vector retriever for storage and retrieval, and lcel for implementing chains.
--- a/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb
+++ b/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb
--- a/cookbook/cql_agent.ipynb
+++ b/cookbook/cql_agent.ipynb
@@ -38,7 +38,7 @@
   "source": [
    "Connection is via `cassio` using `auto=True` parameter, and the notebook uses OpenAI. You should create a `.env` file accordingly.\n",
    "\n",
-    "For Casssandra, set:\n",
+    "For Cassandra, set:\n",
    "```bash\n",
    "CASSANDRA_CONTACT_POINTS\n",
    "CASSANDRA_USERNAME\n",
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -73,6 +73,8 @@ append-related:
 generate-references:
 	$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(OUTPUT_NEW_DOCS_DIR)

+update-md: generate-files md-sync
+
 build: install-py-deps generate-files copy-infra render md-sync append-related

 vercel-build: install-vercel-deps build generate-references
--- a/docs/data/people.yml
+++ b/docs/data/people.yml
--- a/docs/docs/additional_resources/arxiv_references.mdx
+++ b/docs/docs/additional_resources/arxiv_references.mdx
@@ -5,51 +5,89 @@ This page contains `arXiv` papers referenced in the LangChain Documentation, API
 Templates, and Cookbooks.

 From the opposite direction, scientists use `LangChain` in research and reference it in the research papers. 
-Here you find papers that reference:
- [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header)
- [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header)
- [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)
+
+`arXiv` papers with references to:
+ [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header) | [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header) | [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)

 ## Summary

 | arXiv id / Title | Authors | Published date 🔻 | LangChain Documentation|
 |------------------|---------|-------------------|------------------------|
-| `2402.03620v1` [Self-Discover: Large Language Models Self-Compose Reasoning Structures](http://arxiv.org/abs/2402.03620v1) | Pei Zhou, Jay Pujara, Xiang Ren,  et al. | 2024-02-06 | `Cookbook:` [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
-| `2401.18059v1` [RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval](http://arxiv.org/abs/2401.18059v1) | Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al. | 2024-01-31 | `Cookbook:` [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
-| `2401.15884v2` [Corrective Retrieval Augmented Generation](http://arxiv.org/abs/2401.15884v2) | Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al. | 2024-01-29 | `Cookbook:` [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
-| `2401.04088v1` [Mixtral of Experts](http://arxiv.org/abs/2401.04088v1) | Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al. | 2024-01-08 | `Cookbook:` [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
-| `2312.06648v2` [Dense X Retrieval: What Retrieval Granularity Should We Use?](http://arxiv.org/abs/2312.06648v2) | Tong Chen, Hongwei Wang, Sihao Chen,  et al. | 2023-12-11 | `Template:` [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
-| `2311.09210v1` [Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models](http://arxiv.org/abs/2311.09210v1) | Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al. | 2023-11-15 | `Template:` [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
-| `2310.11511v1` [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511v1) | Akari Asai, Zeqiu Wu, Yizhong Wang,  et al. | 2023-10-17 | `Cookbook:` [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
-| `2310.06117v2` [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](http://arxiv.org/abs/2310.06117v2) | Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al. | 2023-10-09 | `Template:` [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting), `Cookbook:` [stepback-qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)
-| `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023-07-18 | `Cookbook:` [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
-| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023-05-23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
-| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023-05-15 | `API:` [langchain_experimental.tot](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.tot), `Cookbook:` [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
-| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023-05-06 | `Cookbook:` [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
-| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023-05-03 | `API:` [langchain...LLMListwiseRerank](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
-| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023-04-17 | `Cookbook:` [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
-| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023-04-07 | `Cookbook:` [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
-| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023-03-31 | `Cookbook:` [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
-| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023-03-30 | `API:` [langchain_experimental.autonomous_agents](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
-| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023-01-24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022-12-20 | `API:` [langchain...HypotheticalDocumentEmbedder](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
-| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022-12-12 | `API:` [langchain_experimental.fallacy_removal](https://python.langchain.com/v0.2/api_reference//arxiv/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
-| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022-11-25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
-| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022-11-18 | `API:` [langchain_experimental.pal_chain](https://python.langchain.com/v0.2/api_reference//python/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://python.langchain.com/v0.2/api_reference/experimental/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
-| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022-10-06 | `Docs:` [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), `API:` [langchain...TrajectoryEvalChain](https://python.langchain.com/v0.2/api_reference/langchain/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
-| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022-09-22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
-| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al. | 2022-05-26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
-| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022-05-25 | `API:` [langchain_community...LaserEmbeddings](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
-| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022-03-15 | `API:` [langchain_community...SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
-| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022-02-01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021-02-26 | `API:` [langchain_experimental.open_clip](https://python.langchain.com/v0.2/api_reference//arxiv/experimental_api_reference.html#module-langchain_experimental.open_clip)
-| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019-09-11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+| `2403.14403v2` [Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity](http://arxiv.org/abs/2403.14403v2) | Soyeong Jeong, Jinheon Baek, Sukmin Cho,  et al. | 2024&#8209;03&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2402.03620v1` [Self-Discover: Large Language Models Self-Compose Reasoning Structures](http://arxiv.org/abs/2402.03620v1) | Pei Zhou, Jay Pujara, Xiang Ren,  et al. | 2024&#8209;02&#8209;06 | `Cookbook:` [Self-Discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
+| `2402.03367v2` [RAG-Fusion: a New Take on Retrieval-Augmented Generation](http://arxiv.org/abs/2402.03367v2) | Zackary Rackauckas | 2024&#8209;01&#8209;31 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2401.18059v1` [RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval](http://arxiv.org/abs/2401.18059v1) | Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al. | 2024&#8209;01&#8209;31 | `Cookbook:` [Raptor](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
+| `2401.15884v2` [Corrective Retrieval Augmented Generation](http://arxiv.org/abs/2401.15884v2) | Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al. | 2024&#8209;01&#8209;29 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Cookbook:` [Langgraph Crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
+| `2401.08500v1` [Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering](http://arxiv.org/abs/2401.08500v1) | Tal Ridnik, Dedy Kredo, Itamar Friedman | 2024&#8209;01&#8209;16 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2401.04088v1` [Mixtral of Experts](http://arxiv.org/abs/2401.04088v1) | Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al. | 2024&#8209;01&#8209;08 | `Cookbook:` [Together Ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
+| `2312.06648v2` [Dense X Retrieval: What Retrieval Granularity Should We Use?](http://arxiv.org/abs/2312.06648v2) | Tong Chen, Hongwei Wang, Sihao Chen,  et al. | 2023&#8209;12&#8209;11 | `Template:` [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
+| `2311.09210v1` [Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models](http://arxiv.org/abs/2311.09210v1) | Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al. | 2023&#8209;11&#8209;15 | `Template:` [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
+| `2310.11511v1` [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511v1) | Akari Asai, Zeqiu Wu, Yizhong Wang,  et al. | 2023&#8209;10&#8209;17 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Cookbook:` [Langgraph Self Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
+| `2310.06117v2` [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](http://arxiv.org/abs/2310.06117v2) | Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al. | 2023&#8209;10&#8209;09 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Template:` [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting), `Cookbook:` [Stepback-Qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)
+| `2307.15337v3` [Skeleton-of-Thought: Prompting LLMs for Efficient Parallel Generation](http://arxiv.org/abs/2307.15337v3) | Xuefei Ning, Zinan Lin, Zixuan Zhou,  et al. | 2023&#8209;07&#8209;28 | `Template:` [skeleton-of-thought](https://python.langchain.com/docs/templates/skeleton-of-thought)
+| `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023&#8209;07&#8209;18 | `Cookbook:` [Semi Structured Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
+| `2307.03172v3` [Lost in the Middle: How Language Models Use Long Contexts](http://arxiv.org/abs/2307.03172v3) | Nelson F. Liu, Kevin Lin, John Hewitt,  et al. | 2023&#8209;07&#8209;06 | `Docs:` [docs/how_to/long_context_reorder](https://python.langchain.com/v0.2/docs/how_to/long_context_reorder)
+| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023&#8209;05&#8209;23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [Rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
+| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023&#8209;05&#8209;15 | `API:` [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot), `Cookbook:` [Tree Of Thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
+| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023&#8209;05&#8209;06 | `Cookbook:` [Plan And Execute Agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
+| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023&#8209;05&#8209;03 | `Docs:` [docs/how_to/contextual_compression](https://python.langchain.com/v0.2/docs/how_to/contextual_compression), `API:` [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023&#8209;04&#8209;17 | `Cookbook:` [Semi Structured Multi Modal Rag Llama2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi Structured And Multi Modal Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)
+| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023&#8209;04&#8209;07 | `Cookbook:` [Generative Agents Interactive Simulacra Of Human Behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [Multiagent Bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)
+| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023&#8209;03&#8209;31 | `Cookbook:` [Camel Role Playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
+| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023&#8209;03&#8209;30 | `API:` [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [Hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
+| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023&#8209;01&#8209;24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
+| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022&#8209;12&#8209;20 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `API:` [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [Hypothetical Document Embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
+| `2212.08073v1` [Constitutional AI: Harmlessness from AI Feedback](http://arxiv.org/abs/2212.08073v1) | Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al. | 2022&#8209;12&#8209;15 | `Docs:` [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/v0.2/docs/versions/migrating_chains/constitutional_chain)
+| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022&#8209;12&#8209;12 | `API:` [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
+| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022&#8209;11&#8209;25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
+| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022&#8209;11&#8209;18 | `API:` [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [Program Aided Language Model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
+| `2210.11934v2` [An Analysis of Fusion Functions for Hybrid Retrieval](http://arxiv.org/abs/2210.11934v2) | Sebastian Bruch, Siyu Gai, Amir Ingber | 2022&#8209;10&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022&#8209;10&#8209;06 | `Docs:` [docs/integrations/tools/ionic_shopping](https://python.langchain.com/v0.2/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/v0.2/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `API:` [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
+| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022&#8209;09&#8209;22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/v0.2/docs/integrations/providers/activeloop_deeplake)
+| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al. | 2022&#8209;05&#8209;26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/v0.2/docs/integrations/providers/snowflake)
+| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022&#8209;05&#8209;25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
+| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022&#8209;03&#8209;15 | `Docs:` [docs/tutorials/sql_qa](https://python.langchain.com/v0.2/docs/tutorials/sql_qa), `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
+| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022&#8209;02&#8209;01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
+| `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al. | 2021&#8209;12&#8209;02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/v0.2/docs/integrations/providers/dspy)
+| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021&#8209;02&#8209;26 | `API:` [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
+| `2005.14165v4` [Language Models are Few-Shot Learners](http://arxiv.org/abs/2005.14165v4) | Tom B. Brown, Benjamin Mann, Nick Ryder,  et al. | 2020&#8209;05&#8209;28 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2005.11401v4` [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](http://arxiv.org/abs/2005.11401v4) | Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al. | 2020&#8209;05&#8209;22 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019&#8209;09&#8209;11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

+## Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity
+
+- **Authors:** Soyeong Jeong, Jinheon Baek, Sukmin Cho,  et al.
+- **arXiv id:** [2403.14403v2](http://arxiv.org/abs/2403.14403v2)  **Published Date:** 2024-03-21
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Retrieval-Augmented Large Language Models (LLMs), which incorporate the
+non-parametric knowledge from external knowledge bases into LLMs, have emerged
+as a promising approach to enhancing response accuracy in several tasks, such
+as Question-Answering (QA). However, even though there are various approaches
+dealing with queries of different complexities, they either handle simple
+queries with unnecessary computational overhead or fail to adequately address
+complex multi-step queries; yet, not all user requests fall into only one of
+the simple or complex categories. In this work, we propose a novel adaptive QA
+framework, that can dynamically select the most suitable strategy for
+(retrieval-augmented) LLMs from the simplest to the most sophisticated ones
+based on the query complexity. Also, this selection process is operationalized
+with a classifier, which is a smaller LM trained to predict the complexity
+level of incoming queries with automatically collected labels, obtained from
+actual predicted outcomes of models and inherent inductive biases in datasets.
+This approach offers a balanced strategy, seamlessly adapting between the
+iterative and single-step retrieval-augmented LLMs, as well as the no-retrieval
+methods, in response to a range of query complexities. We validate our model on
+a set of open-domain QA datasets, covering multiple query complexities, and
+show that ours enhances the overall efficiency and accuracy of QA systems,
+compared to relevant baselines including the adaptive retrieval approaches.
+Code is available at: https://github.com/starsuzi/Adaptive-RAG.
+                
 ## Self-Discover: Large Language Models Self-Compose Reasoning Structures

- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1)  **Published Date:** 2024-02-06
- **Title:** Self-Discover: Large Language Models Self-Compose Reasoning Structures
 - **Authors:** Pei Zhou, Jay Pujara, Xiang Ren,  et al.
+- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1)  **Published Date:** 2024-02-06
 - **LangChain:**

   - **Cookbook:** [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
@@ -69,11 +107,33 @@ the self-discovered reasoning structures are universally applicable across
 model families: from PaLM 2-L to GPT-4, and from GPT-4 to Llama2, and share
 commonalities with human reasoning patterns.
                
+## RAG-Fusion: a New Take on Retrieval-Augmented Generation
+
+- **Authors:** Zackary Rackauckas
+- **arXiv id:** [2402.03367v2](http://arxiv.org/abs/2402.03367v2)  **Published Date:** 2024-01-31
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Infineon has identified a need for engineers, account managers, and customers
+to rapidly obtain product information. This problem is traditionally addressed
+with retrieval-augmented generation (RAG) chatbots, but in this study, I
+evaluated the use of the newly popularized RAG-Fusion method. RAG-Fusion
+combines RAG and reciprocal rank fusion (RRF) by generating multiple queries,
+reranking them with reciprocal scores and fusing the documents and scores.
+Through manually evaluating answers on accuracy, relevance, and
+comprehensiveness, I found that RAG-Fusion was able to provide accurate and
+comprehensive answers due to the generated queries contextualizing the original
+query from various perspectives. However, some answers strayed off topic when
+the generated queries' relevance to the original query is insufficient. This
+research marks significant progress in artificial intelligence (AI) and natural
+language processing (NLP) applications and demonstrates transformations in a
+global and multi-industry context.
+                
 ## RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval

- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1)  **Published Date:** 2024-01-31
- **Title:** RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval
 - **Authors:** Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al.
+- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1)  **Published Date:** 2024-01-31
 - **LangChain:**

   - **Cookbook:** [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
@@ -95,11 +155,11 @@ benchmark by 20% in absolute accuracy.
                
 ## Corrective Retrieval Augmented Generation

- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2)  **Published Date:** 2024-01-29
- **Title:** Corrective Retrieval Augmented Generation
 - **Authors:** Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al.
+- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2)  **Published Date:** 2024-01-29
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Cookbook:** [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)

 **Abstract:** Large language models (LLMs) inevitably exhibit hallucinations since the
@@ -121,11 +181,36 @@ RAG-based approaches. Experiments on four datasets covering short- and
 long-form generation tasks show that CRAG can significantly improve the
 performance of RAG-based approaches.
                
+## Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering
+
+- **Authors:** Tal Ridnik, Dedy Kredo, Itamar Friedman
+- **arXiv id:** [2401.08500v1](http://arxiv.org/abs/2401.08500v1)  **Published Date:** 2024-01-16
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Code generation problems differ from common natural language problems - they
+require matching the exact syntax of the target language, identifying happy
+paths and edge cases, paying attention to numerous small details in the problem
+spec, and addressing other code-specific issues and requirements. Hence, many
+of the optimizations and tricks that have been successful in natural language
+generation may not be effective for code tasks. In this work, we propose a new
+approach to code generation by LLMs, which we call AlphaCodium - a test-based,
+multi-stage, code-oriented iterative flow, that improves the performances of
+LLMs on code problems. We tested AlphaCodium on a challenging code generation
+dataset called CodeContests, which includes competitive programming problems
+from platforms such as Codeforces. The proposed flow consistently and
+significantly improves results. On the validation set, for example, GPT-4
+accuracy (pass@5) increased from 19% with a single well-designed direct prompt
+to 44% with the AlphaCodium flow. Many of the principles and best practices
+acquired in this work, we believe, are broadly applicable to general code
+generation tasks. Full implementation is available at:
+https://github.com/Codium-ai/AlphaCodium
+                
 ## Mixtral of Experts

- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1)  **Published Date:** 2024-01-08
- **Title:** Mixtral of Experts
 - **Authors:** Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al.
+- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1)  **Published Date:** 2024-01-08
 - **LangChain:**

   - **Cookbook:** [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
@@ -147,9 +232,8 @@ the base and instruct models are released under the Apache 2.0 license.
                
 ## Dense X Retrieval: What Retrieval Granularity Should We Use?

- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2)  **Published Date:** 2023-12-11
- **Title:** Dense X Retrieval: What Retrieval Granularity Should We Use?
 - **Authors:** Tong Chen, Hongwei Wang, Sihao Chen,  et al.
+- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2)  **Published Date:** 2023-12-11
 - **LangChain:**

   - **Template:** [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
@@ -174,9 +258,8 @@ information.
                
 ## Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models

- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1)  **Published Date:** 2023-11-15
- **Title:** Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models
 - **Authors:** Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al.
+- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1)  **Published Date:** 2023-11-15
 - **LangChain:**

   - **Template:** [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
@@ -206,11 +289,11 @@ outside the pre-training knowledge scope.
                
 ## Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection

- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1)  **Published Date:** 2023-10-17
- **Title:** Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection
 - **Authors:** Akari Asai, Zeqiu Wu, Yizhong Wang,  et al.
+- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1)  **Published Date:** 2023-10-17
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Cookbook:** [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)

 **Abstract:** Despite their remarkable capabilities, large language models (LLMs) often
@@ -237,11 +320,11 @@ to these models.
                
 ## Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models

- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2)  **Published Date:** 2023-10-09
- **Title:** Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models
 - **Authors:** Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al.
+- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2)  **Published Date:** 2023-10-09
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Template:** [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting)
   - **Cookbook:** [stepback-qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)

@@ -256,11 +339,31 @@ including STEM, Knowledge QA, and Multi-Hop Reasoning. For instance, Step-Back
 Prompting improves PaLM-2L performance on MMLU (Physics and Chemistry) by 7%
 and 11% respectively, TimeQA by 27%, and MuSiQue by 7%.
                
+## Skeleton-of-Thought: Prompting LLMs for Efficient Parallel Generation
+
+- **Authors:** Xuefei Ning, Zinan Lin, Zixuan Zhou,  et al.
+- **arXiv id:** [2307.15337v3](http://arxiv.org/abs/2307.15337v3)  **Published Date:** 2023-07-28
+- **LangChain:**
+
+   - **Template:** [skeleton-of-thought](https://python.langchain.com/docs/templates/skeleton-of-thought)
+
+**Abstract:** This work aims at decreasing the end-to-end generation latency of large
+language models (LLMs). One of the major causes of the high generation latency
+is the sequential decoding approach adopted by almost all state-of-the-art
+LLMs. In this work, motivated by the thinking and writing process of humans, we
+propose Skeleton-of-Thought (SoT), which first guides LLMs to generate the
+skeleton of the answer, and then conducts parallel API calls or batched
+decoding to complete the contents of each skeleton point in parallel. Not only
+does SoT provide considerable speed-ups across 12 LLMs, but it can also
+potentially improve the answer quality on several question categories. SoT is
+an initial attempt at data-centric optimization for inference efficiency, and
+showcases the potential of eliciting high-quality answers by explicitly
+planning the answer structure in language.
+                
 ## Llama 2: Open Foundation and Fine-Tuned Chat Models

- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2)  **Published Date:** 2023-07-18
- **Title:** Llama 2: Open Foundation and Fine-Tuned Chat Models
 - **Authors:** Hugo Touvron, Louis Martin, Kevin Stone,  et al.
+- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2)  **Published Date:** 2023-07-18
 - **LangChain:**

   - **Cookbook:** [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
@@ -275,11 +378,32 @@ detailed description of our approach to fine-tuning and safety improvements of
 Llama 2-Chat in order to enable the community to build on our work and
 contribute to the responsible development of LLMs.
                
+## Lost in the Middle: How Language Models Use Long Contexts
+
+- **Authors:** Nelson F. Liu, Kevin Lin, John Hewitt,  et al.
+- **arXiv id:** [2307.03172v3](http://arxiv.org/abs/2307.03172v3)  **Published Date:** 2023-07-06
+- **LangChain:**
+
+   - **Documentation:** [docs/how_to/long_context_reorder](https://python.langchain.com/v0.2/docs/how_to/long_context_reorder)
+
+**Abstract:** While recent language models have the ability to take long contexts as input,
+relatively little is known about how well they use longer context. We analyze
+the performance of language models on two tasks that require identifying
+relevant information in their input contexts: multi-document question answering
+and key-value retrieval. We find that performance can degrade significantly
+when changing the position of relevant information, indicating that current
+language models do not robustly make use of information in long input contexts.
+In particular, we observe that performance is often highest when relevant
+information occurs at the beginning or end of the input context, and
+significantly degrades when models must access relevant information in the
+middle of long contexts, even for explicitly long-context models. Our analysis
+provides a better understanding of how language models use their input context
+and provides new evaluation protocols for future long-context language models.
+                
 ## Query Rewriting for Retrieval-Augmented Large Language Models

- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3)  **Published Date:** 2023-05-23
- **Title:** Query Rewriting for Retrieval-Augmented Large Language Models
 - **Authors:** Xinbei Ma, Yeyun Gong, Pengcheng He,  et al.
+- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3)  **Published Date:** 2023-05-23
 - **LangChain:**

   - **Template:** [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read)
@@ -305,12 +429,11 @@ for retrieval-augmented LLM.
                
 ## Large Language Model Guided Tree-of-Thought

- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1)  **Published Date:** 2023-05-15
- **Title:** Large Language Model Guided Tree-of-Thought
 - **Authors:** Jieyi Long
+- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1)  **Published Date:** 2023-05-15
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.tot](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.tot)
+   - **API Reference:** [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot)
   - **Cookbook:** [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)

 **Abstract:** In this paper, we introduce the Tree-of-Thought (ToT) framework, a novel
@@ -333,9 +456,8 @@ implementation of the ToT-based Sudoku solver is available on GitHub:
                
 ## Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models

- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3)  **Published Date:** 2023-05-06
- **Title:** Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models
 - **Authors:** Lei Wang, Wanyu Xu, Yihuai Lan,  et al.
+- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3)  **Published Date:** 2023-05-06
 - **LangChain:**

   - **Cookbook:** [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
@@ -364,12 +486,12 @@ https://github.com/AGI-Edgerunners/Plan-and-Solve-Prompting.
                
 ## Zero-Shot Listwise Document Reranking with a Large Language Model

- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1)  **Published Date:** 2023-05-03
- **Title:** Zero-Shot Listwise Document Reranking with a Large Language Model
 - **Authors:** Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al.
+- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1)  **Published Date:** 2023-05-03
 - **LangChain:**

-   - **API Reference:** [langchain...LLMListwiseRerank](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+   - **Documentation:** [docs/how_to/contextual_compression](https://python.langchain.com/v0.2/docs/how_to/contextual_compression)
+   - **API Reference:** [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)

 **Abstract:** Supervised ranking methods based on bi-encoder or cross-encoder architectures
 have shown success in multi-stage text ranking tasks, but they require large
@@ -388,12 +510,11 @@ with results showing its potential to generalize across different languages.
                
 ## Visual Instruction Tuning

- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2)  **Published Date:** 2023-04-17
- **Title:** Visual Instruction Tuning
 - **Authors:** Haotian Liu, Chunyuan Li, Qingyang Wu,  et al.
+- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2)  **Published Date:** 2023-04-17
 - **LangChain:**

-   - **Cookbook:** [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
+   - **Cookbook:** [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)

 **Abstract:** Instruction tuning large language models (LLMs) using machine-generated
 instruction-following data has improved zero-shot capabilities on new tasks,
@@ -413,12 +534,11 @@ publicly available.
                
 ## Generative Agents: Interactive Simulacra of Human Behavior

- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2)  **Published Date:** 2023-04-07
- **Title:** Generative Agents: Interactive Simulacra of Human Behavior
 - **Authors:** Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al.
+- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2)  **Published Date:** 2023-04-07
 - **LangChain:**

-   - **Cookbook:** [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
+   - **Cookbook:** [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)

 **Abstract:** Believable proxies of human behavior can empower interactive applications
 ranging from immersive environments to rehearsal spaces for interpersonal
@@ -447,9 +567,8 @@ interaction patterns for enabling believable simulations of human behavior.
                
 ## CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society

- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2)  **Published Date:** 2023-03-31
- **Title:** CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society
 - **Authors:** Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al.
+- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2)  **Published Date:** 2023-03-31
 - **LangChain:**

   - **Cookbook:** [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
@@ -475,12 +594,11 @@ agents and beyond: https://github.com/camel-ai/camel.
                
 ## HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face

- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4)  **Published Date:** 2023-03-30
- **Title:** HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face
 - **Authors:** Yongliang Shen, Kaitao Song, Xu Tan,  et al.
+- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4)  **Published Date:** 2023-03-30
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.autonomous_agents](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.autonomous_agents)
+   - **API Reference:** [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents)
   - **Cookbook:** [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)

 **Abstract:** Solving complicated AI tasks with different domains and modalities is a key
@@ -505,12 +623,11 @@ realization of artificial general intelligence.
                
 ## A Watermark for Large Language Models

- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4)  **Published Date:** 2023-01-24
- **Title:** A Watermark for Large Language Models
 - **Authors:** John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al.
+- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4)  **Published Date:** 2023-01-24
 - **LangChain:**

-   - **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Potential harms of large language models can be mitigated by watermarking
 model output, i.e., embedding signals into generated text that are invisible to
@@ -528,12 +645,12 @@ family, and discuss robustness and security.
                
 ## Precise Zero-Shot Dense Retrieval without Relevance Labels

- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1)  **Published Date:** 2022-12-20
- **Title:** Precise Zero-Shot Dense Retrieval without Relevance Labels
 - **Authors:** Luyu Gao, Xueguang Ma, Jimmy Lin,  et al.
+- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1)  **Published Date:** 2022-12-20
 - **LangChain:**

-   - **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+   - **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
   - **Template:** [hyde](https://python.langchain.com/docs/templates/hyde)
   - **Cookbook:** [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)

@@ -555,14 +672,40 @@ state-of-the-art unsupervised dense retriever Contriever and shows strong
 performance comparable to fine-tuned retrievers, across various tasks (e.g. web
 search, QA, fact verification) and languages~(e.g. sw, ko, ja).
                
-## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
+## Constitutional AI: Harmlessness from AI Feedback

- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3)  **Published Date:** 2022-12-12
- **Title:** Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
- **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al.
+- **Authors:** Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al.
+- **arXiv id:** [2212.08073v1](http://arxiv.org/abs/2212.08073v1)  **Published Date:** 2022-12-15
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.fallacy_removal](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.fallacy_removal)
+   - **Documentation:** [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/v0.2/docs/versions/migrating_chains/constitutional_chain)
+
+**Abstract:** As AI systems become more capable, we would like to enlist their help to
+supervise other AIs. We experiment with methods for training a harmless AI
+assistant through self-improvement, without any human labels identifying
+harmful outputs. The only human oversight is provided through a list of rules
+or principles, and so we refer to the method as 'Constitutional AI'. The
+process involves both a supervised learning and a reinforcement learning phase.
+In the supervised phase we sample from an initial model, then generate
+self-critiques and revisions, and then finetune the original model on revised
+responses. In the RL phase, we sample from the finetuned model, use a model to
+evaluate which of the two samples is better, and then train a preference model
+from this dataset of AI preferences. We then train with RL using the preference
+model as the reward signal, i.e. we use 'RL from AI Feedback' (RLAIF). As a
+result we are able to train a harmless but non-evasive AI assistant that
+engages with harmful queries by explaining its objections to them. Both the SL
+and RL methods can leverage chain-of-thought style reasoning to improve the
+human-judged performance and transparency of AI decision making. These methods
+make it possible to control AI behavior more precisely and with far fewer human
+labels.
+                
+## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
+
+- **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al.
+- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3)  **Published Date:** 2022-12-12
+- **LangChain:**
+
+   - **API Reference:** [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)

 **Abstract:** The spread of misinformation, propaganda, and flawed argumentation has been
 amplified in the Internet era. Given the volume of data and the subtlety of
@@ -588,12 +731,11 @@ further work on logical fallacy identification.
                
 ## Complementary Explanations for Effective In-Context Learning

- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2)  **Published Date:** 2022-11-25
- **Title:** Complementary Explanations for Effective In-Context Learning
 - **Authors:** Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al.
+- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2)  **Published Date:** 2022-11-25
 - **LangChain:**

-   - **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
+   - **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)

 **Abstract:** Large language models (LLMs) have exhibited remarkable capabilities in
 learning from explanations in prompts, but there has been limited understanding
@@ -614,12 +756,11 @@ performance across three real-world tasks on multiple LLMs.
                
 ## PAL: Program-aided Language Models

- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2)  **Published Date:** 2022-11-18
- **Title:** PAL: Program-aided Language Models
 - **Authors:** Luyu Gao, Aman Madaan, Shuyan Zhou,  et al.
+- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2)  **Published Date:** 2022-11-18
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.pal_chain](https://python.langchain.com/v0.2/api_reference//python/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://python.langchain.com/v0.2/api_reference/experimental/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
+   - **API Reference:** [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
   - **Cookbook:** [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)

 **Abstract:** Large language models (LLMs) have recently demonstrated an impressive ability
@@ -645,15 +786,33 @@ accuracy on the GSM8K benchmark of math word problems, surpassing PaLM-540B
 which uses chain-of-thought by absolute 15% top-1. Our code and data are
 publicly available at http://reasonwithpal.com/ .
                
-## ReAct: Synergizing Reasoning and Acting in Language Models
+## An Analysis of Fusion Functions for Hybrid Retrieval

- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3)  **Published Date:** 2022-10-06
- **Title:** ReAct: Synergizing Reasoning and Acting in Language Models
- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al.
+- **Authors:** Sebastian Bruch, Siyu Gai, Amir Ingber
+- **arXiv id:** [2210.11934v2](http://arxiv.org/abs/2210.11934v2)  **Published Date:** 2022-10-21
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping)
-   - **API Reference:** [langchain...TrajectoryEvalChain](https://python.langchain.com/v0.2/api_reference/langchain/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** We study hybrid search in text retrieval where lexical and semantic search
+are fused together with the intuition that the two are complementary in how
+they model relevance. In particular, we examine fusion by a convex combination
+(CC) of lexical and semantic scores, as well as the Reciprocal Rank Fusion
+(RRF) method, and identify their advantages and potential pitfalls. Contrary to
+existing studies, we find RRF to be sensitive to its parameters; that the
+learning of a CC fusion is generally agnostic to the choice of score
+normalization; that CC outperforms RRF in in-domain and out-of-domain settings;
+and finally, that CC is sample efficient, requiring only a small set of
+training examples to tune its only parameter to a target domain.
+                
+## ReAct: Synergizing Reasoning and Acting in Language Models
+
+- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al.
+- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3)  **Published Date:** 2022-10-06
+- **LangChain:**
+
+   - **Documentation:** [docs/integrations/tools/ionic_shopping](https://python.langchain.com/v0.2/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/v0.2/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+   - **API Reference:** [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)

 **Abstract:** While large language models (LLMs) have demonstrated impressive capabilities
 across tasks in language understanding and interactive decision making, their
@@ -680,12 +839,11 @@ Project site with code: https://react-lm.github.io
                
 ## Deep Lake: a Lakehouse for Deep Learning

- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2)  **Published Date:** 2022-09-22
- **Title:** Deep Lake: a Lakehouse for Deep Learning
 - **Authors:** Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al.
+- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2)  **Published Date:** 2022-09-22
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
+   - **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/v0.2/docs/integrations/providers/activeloop_deeplake)

 **Abstract:** Traditional data lakes provide critical data infrastructure for analytical
 workloads by enabling time travel, running SQL queries, ingesting data with
@@ -706,12 +864,11 @@ TensorFlow, JAX, and integrate with numerous MLOps tools.
                
 ## Matryoshka Representation Learning

- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4)  **Published Date:** 2022-05-26
- **Title:** Matryoshka Representation Learning
 - **Authors:** Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al.
+- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4)  **Published Date:** 2022-05-26
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
+   - **Documentation:** [docs/integrations/providers/snowflake](https://python.langchain.com/v0.2/docs/integrations/providers/snowflake)

 **Abstract:** Learned representations are a central component in modern ML systems, serving
 a multitude of downstream tasks. When training such representations, it is
@@ -738,12 +895,11 @@ are open-sourced at https://github.com/RAIVNLab/MRL.
                
 ## Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages

- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1)  **Published Date:** 2022-05-25
- **Title:** Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages
 - **Authors:** Kevin Heffernan, Onur Çelebi, Holger Schwenk
+- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1)  **Published Date:** 2022-05-25
 - **LangChain:**

-   - **API Reference:** [langchain_community...LaserEmbeddings](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
+   - **API Reference:** [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)

 **Abstract:** Scaling multilingual representation learning beyond the hundred most frequent
 languages is challenging, in particular to cover the long tail of low-resource
@@ -765,12 +921,12 @@ encoders, mine bitexts, and validate the bitexts by training NMT systems.
                
 ## Evaluating the Text-to-SQL Capabilities of Large Language Models

- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1)  **Published Date:** 2022-03-15
- **Title:** Evaluating the Text-to-SQL Capabilities of Large Language Models
 - **Authors:** Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau
+- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1)  **Published Date:** 2022-03-15
 - **LangChain:**

-   - **API Reference:** [langchain_community...SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
+   - **Documentation:** [docs/tutorials/sql_qa](https://python.langchain.com/v0.2/docs/tutorials/sql_qa)
+   - **API Reference:** [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)

 **Abstract:** We perform an empirical evaluation of Text-to-SQL capabilities of the Codex
 language model. We find that, without any finetuning, Codex is a strong
@@ -782,12 +938,11 @@ few-shot examples.
                
 ## Locally Typical Sampling

- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5)  **Published Date:** 2022-02-01
- **Title:** Locally Typical Sampling
 - **Authors:** Clara Meister, Tiago Pimentel, Gian Wiher,  et al.
+- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5)  **Published Date:** 2022-02-01
 - **LangChain:**

-   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Today's probabilistic language generators fall short when it comes to
 producing coherent and fluent text despite the fact that the underlying models
@@ -810,14 +965,35 @@ locally typical sampling offers competitive performance (in both abstractive
 summarization and story generation) in terms of quality while consistently
 reducing degenerate repetitions.
                
-## Learning Transferable Visual Models From Natural Language Supervision
+## ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction

- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1)  **Published Date:** 2021-02-26
- **Title:** Learning Transferable Visual Models From Natural Language Supervision
- **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy,  et al.
+- **Authors:** Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al.
+- **arXiv id:** [2112.01488v3](http://arxiv.org/abs/2112.01488v3)  **Published Date:** 2021-12-02
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.open_clip](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.open_clip)
+   - **Documentation:** [docs/integrations/retrievers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/v0.2/docs/integrations/providers/dspy)
+
+**Abstract:** Neural information retrieval (IR) has greatly advanced search and other
+knowledge-intensive language tasks. While many neural IR methods encode queries
+and documents into single-vector representations, late interaction models
+produce multi-vector representations at the granularity of each token and
+decompose relevance modeling into scalable token-level computations. This
+decomposition has been shown to make late interaction more effective, but it
+inflates the space footprint of these models by an order of magnitude. In this
+work, we introduce ColBERTv2, a retriever that couples an aggressive residual
+compression mechanism with a denoised supervision strategy to simultaneously
+improve the quality and space footprint of late interaction. We evaluate
+ColBERTv2 across a wide range of benchmarks, establishing state-of-the-art
+quality within and outside the training domain while reducing the space
+footprint of late interaction models by 6--10$\times$.
+                
+## Learning Transferable Visual Models From Natural Language Supervision
+
+- **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy,  et al.
+- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1)  **Published Date:** 2021-02-26
+- **LangChain:**
+
+   - **API Reference:** [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)

 **Abstract:** State-of-the-art computer vision systems are trained to predict a fixed set
 of predetermined object categories. This restricted form of supervision limits
@@ -840,14 +1016,77 @@ zero-shot without needing to use any of the 1.28 million training examples it
 was trained on. We release our code and pre-trained model weights at
 https://github.com/OpenAI/CLIP.
                
-## CTRL: A Conditional Transformer Language Model for Controllable Generation
+## Language Models are Few-Shot Learners

- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2)  **Published Date:** 2019-09-11
- **Title:** CTRL: A Conditional Transformer Language Model for Controllable Generation
- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al.
+- **Authors:** Tom B. Brown, Benjamin Mann, Nick Ryder,  et al.
+- **arXiv id:** [2005.14165v4](http://arxiv.org/abs/2005.14165v4)  **Published Date:** 2020-05-28
 - **LangChain:**

-   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Recent work has demonstrated substantial gains on many NLP tasks and
+benchmarks by pre-training on a large corpus of text followed by fine-tuning on
+a specific task. While typically task-agnostic in architecture, this method
+still requires task-specific fine-tuning datasets of thousands or tens of
+thousands of examples. By contrast, humans can generally perform a new language
+task from only a few examples or from simple instructions - something which
+current NLP systems still largely struggle to do. Here we show that scaling up
+language models greatly improves task-agnostic, few-shot performance, sometimes
+even reaching competitiveness with prior state-of-the-art fine-tuning
+approaches. Specifically, we train GPT-3, an autoregressive language model with
+175 billion parameters, 10x more than any previous non-sparse language model,
+and test its performance in the few-shot setting. For all tasks, GPT-3 is
+applied without any gradient updates or fine-tuning, with tasks and few-shot
+demonstrations specified purely via text interaction with the model. GPT-3
+achieves strong performance on many NLP datasets, including translation,
+question-answering, and cloze tasks, as well as several tasks that require
+on-the-fly reasoning or domain adaptation, such as unscrambling words, using a
+novel word in a sentence, or performing 3-digit arithmetic. At the same time,
+we also identify some datasets where GPT-3's few-shot learning still struggles,
+as well as some datasets where GPT-3 faces methodological issues related to
+training on large web corpora. Finally, we find that GPT-3 can generate samples
+of news articles which human evaluators have difficulty distinguishing from
+articles written by humans. We discuss broader societal impacts of this finding
+and of GPT-3 in general.
+                
+## Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
+
+- **Authors:** Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al.
+- **arXiv id:** [2005.11401v4](http://arxiv.org/abs/2005.11401v4)  **Published Date:** 2020-05-22
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Large pre-trained language models have been shown to store factual knowledge
+in their parameters, and achieve state-of-the-art results when fine-tuned on
+downstream NLP tasks. However, their ability to access and precisely manipulate
+knowledge is still limited, and hence on knowledge-intensive tasks, their
+performance lags behind task-specific architectures. Additionally, providing
+provenance for their decisions and updating their world knowledge remain open
+research problems. Pre-trained models with a differentiable access mechanism to
+explicit non-parametric memory can overcome this issue, but have so far been
+only investigated for extractive downstream tasks. We explore a general-purpose
+fine-tuning recipe for retrieval-augmented generation (RAG) -- models which
+combine pre-trained parametric and non-parametric memory for language
+generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index
+of Wikipedia, accessed with a pre-trained neural retriever. We compare two RAG
+formulations, one which conditions on the same retrieved passages across the
+whole generated sequence, the other can use different passages per token. We
+fine-tune and evaluate our models on a wide range of knowledge-intensive NLP
+tasks and set the state-of-the-art on three open domain QA tasks, outperforming
+parametric seq2seq models and task-specific retrieve-and-extract architectures.
+For language generation tasks, we find that RAG models generate more specific,
+diverse and factual language than a state-of-the-art parametric-only seq2seq
+baseline.
+                
+## CTRL: A Conditional Transformer Language Model for Controllable Generation
+
+- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al.
+- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2)  **Published Date:** 2019-09-11
+- **LangChain:**
+
+   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Large-scale language models show promising text generation capabilities, but
 users cannot easily control particular aspects of the generated text. We
--- a/docs/docs/concepts.mdx
+++ b/docs/docs/concepts.mdx
@@ -15,11 +15,6 @@ The interfaces for core components like LLMs, vector stores, retrievers and more
 No third party integrations are defined here.
 The dependencies are kept purposefully very lightweight.

-### Partner packages
-
-While the long tail of integrations are in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
-This was done in order to improve support for these important integrations.
-
 ### `langchain`

 The main `langchain` package contains chains, agents, and retrieval strategies that make up an application's cognitive architecture.
@@ -33,6 +28,11 @@ Key partner packages are separated out (see below).
 This contains all integrations for various components (LLMs, vector stores, retrievers).
 All dependencies in this package are optional to keep the package as lightweight as possible.

+### Partner packages
+
+While the long tail of integrations is in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
+This was done in order to improve support for these important integrations.
+
 ### [`langgraph`](https://langchain-ai.github.io/langgraph)

 `langgraph` is an extension of `langchain` aimed at
@@ -61,28 +61,28 @@ A developer platform that lets you debug, test, evaluate, and monitor LLM applic
 ## LangChain Expression Language (LCEL)
 <span data-heading-keywords="lcel"></span>

-LangChain Expression Language, or LCEL, is a declarative way to chain LangChain components.
+`LangChain Expression Language`, or `LCEL`, is a declarative way to chain LangChain components.
 LCEL was designed from day 1 to **support putting prototypes in production, with no code changes**, from the simplest “prompt + LLM” chain to the most complex chains (we’ve seen folks successfully run LCEL chains with 100s of steps in production). To highlight a few of the reasons you might want to use LCEL:

-**First-class streaming support**
+- **First-class streaming support:**
 When you build your chains with LCEL you get the best possible time-to-first-token (time elapsed until the first chunk of output comes out). For some chains this means eg. we stream tokens straight from an LLM to a streaming output parser, and you get back parsed, incremental chunks of output at the same rate as the LLM provider outputs the raw tokens.

-**Async support**
+- **Async support:**
 Any chain built with LCEL can be called both with the synchronous API (eg. in your Jupyter notebook while prototyping) as well as with the asynchronous API (eg. in a [LangServe](/docs/langserve/) server). This enables using the same code for prototypes and in production, with great performance, and the ability to handle many concurrent requests in the same server.

-**Optimized parallel execution**
+- **Optimized parallel execution:**
 Whenever your LCEL chains have steps that can be executed in parallel (eg if you fetch documents from multiple retrievers) we automatically do it, both in the sync and the async interfaces, for the smallest possible latency.

-**Retries and fallbacks**
+- **Retries and fallbacks:**
 Configure retries and fallbacks for any part of your LCEL chain. This is a great way to make your chains more reliable at scale. We’re currently working on adding streaming support for retries/fallbacks, so you can get the added reliability without any latency cost.

-**Access intermediate results**
+- **Access intermediate results:**
 For more complex chains it’s often very useful to access the results of intermediate steps even before the final output is produced. This can be used to let end-users know something is happening, or even just to debug your chain. You can stream intermediate results, and it’s available on every [LangServe](/docs/langserve) server.

-**Input and output schemas**
+- **Input and output schemas**
 Input and output schemas give every LCEL chain Pydantic and JSONSchema schemas inferred from the structure of your chain. This can be used for validation of inputs and outputs, and is an integral part of LangServe.

-[**Seamless LangSmith tracing**](https://docs.smith.langchain.com)
+- [**Seamless LangSmith tracing**](https://docs.smith.langchain.com)
 As your chains get more and more complex, it becomes increasingly important to understand what exactly is happening at every step.
 With LCEL, **all** steps are automatically logged to [LangSmith](https://docs.smith.langchain.com/) for maximum observability and debuggability.

@@ -186,7 +186,7 @@ For a full list of LangChain model providers with multimodal models, [check out
 <span data-heading-keywords="llm,llms"></span>

 :::caution
-Pure text-in/text-out LLMs tend to be older or lower-level. Many popular models are best used as [chat completion models](/docs/concepts/#chat-models),
+Pure text-in/text-out LLMs tend to be older or lower-level. Many new popular models are best used as [chat completion models](/docs/concepts/#chat-models),
 even for non-chat use cases.

 You are probably looking for [the section above instead](/docs/concepts/#chat-models).
@@ -201,7 +201,7 @@ When messages are passed in as input, they will be formatted into a string under

 LangChain does not host any LLMs, rather we rely on third party integrations.

-For specifics on how to use LLMs, see the [relevant how-to guides here](/docs/how_to/#llms).
+For specifics on how to use LLMs, see the [how-to guides](/docs/how_to/#llms).

 ### Messages

@@ -215,7 +215,7 @@ LangChain has different message classes for different roles.
 The `content` property describes the content of the message.
 This can be a few different things:

- A string (most models deal this type of content)
+- A string (most models deal with this type of content)
 - A List of dictionaries (this is used for multimodal input, where the dictionary contains information about that input type and that input location)

 Optionally, messages can have a `name` property which allows for differentiating between multiple speakers with the same role.
@@ -365,24 +365,18 @@ See documentation for that [here](/docs/concepts/#function-tool-calling).

 :::

-Responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.
+`Output parser` is responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.
 Useful when you are using LLMs to generate structured data, or to normalize output from chat models and LLMs.

 LangChain has lots of different types of output parsers. This is a list of output parsers LangChain supports. The table below has various pieces of information:

-**Name**: The name of the output parser
-
-**Supports Streaming**: Whether the output parser supports streaming.
-
-**Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.
-
-**Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.
-
-**Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.
-
-**Output Type**: The output type of the object returned by the parser.
-
-**Description**: Our commentary on this output parser and when to use it.
+- **Name**: The name of the output parser
+- **Supports Streaming**: Whether the output parser supports streaming.
+- **Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.
+- **Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.
+- **Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.
+- **Output Type**: The output type of the object returned by the parser.
+- **Description**: Our commentary on this output parser and when to use it.

 | Name            | Supports Streaming | Has Format Instructions       | Calls LLM | Input Type                       | Output Type          | Description                                                                                                                                                                                                                                              |
 |-----------------|--------------------|-------------------------------|-----------|----------------------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -534,10 +528,10 @@ Tools are needed whenever you want a model to control parts of your code or call

 A tool consists of:

-1. The name of the tool.
-2. A description of what the tool does.
-3. A JSON schema defining the inputs to the tool.
-4. A function (and, optionally, an async variant of the function).
+1. The `name` of the tool.
+2. A `description` of what the tool does.
+3. A `JSON schema` defining the inputs to the tool.
+4. A `function` (and, optionally, an async variant of the function).

 When a tool is bound to a model, the name, description and JSON schema are provided as context to the model.
 Given a list of tools and a set of instructions, a model can request to call one or more tools with specific inputs.
@@ -650,14 +644,14 @@ The results of those actions can then be fed back into the agent and it determin
 [LangGraph](https://github.com/langchain-ai/langgraph) is an extension of LangChain specifically aimed at creating highly controllable and customizable agents.
 Please check out that documentation for a more in depth overview of agent concepts.

-There is a legacy agent concept in LangChain that we are moving towards deprecating: `AgentExecutor`.
+There is a legacy `agent` concept in LangChain that we are moving towards deprecating: `AgentExecutor`.
 AgentExecutor was essentially a runtime for agents.
 It was a great place to get started, however, it was not flexible enough as you started to have more customized agents.
 In order to solve that we built LangGraph to be this flexible, highly-controllable runtime.

 If you are still using AgentExecutor, do not fear: we still have a guide on [how to use AgentExecutor](/docs/how_to/agent_executor).
 It is recommended, however, that you start to transition to LangGraph.
-In order to assist in this we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).
+In order to assist in this, we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).

 #### ReAct agents
 <span data-heading-keywords="react,react agent"></span>
@@ -743,7 +737,7 @@ callbacks to any child objects.
 :::important Async in Python<=3.10

 Any `RunnableLambda`, a `RunnableGenerator`, or `Tool` that invokes other runnables
-and is running async in python<=3.10, will have to propagate callbacks to child
+and is running `async` in python<=3.10, will have to propagate callbacks to child
 objects manually. This is because LangChain cannot automatically propagate
 callbacks to child objects in this case.

@@ -873,7 +867,7 @@ Furthermore, using tokens can also improve efficiency, since the model processes
 ### Function/tool calling

 :::info
-We use the term tool calling interchangeably with function calling. Although
+We use the term `tool calling` interchangeably with `function calling`. Although
 function calling is sometimes meant to refer to invocations of a single function,
 we treat all models as though they can return multiple tool or function calls in
 each message.
@@ -968,7 +962,6 @@ structured_llm.invoke("Tell me a joke about cats")

 ```
 Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=None)
-
 ```

 We recommend this method as a starting point when working with structured output:
@@ -1107,7 +1100,11 @@ For a full list of model providers that support tool calling, [see this table](/

 ### Few-shot prompting

-One of the most effective ways to improve model performance is to give a model examples of what you want it to do. The technique of adding example inputs and expected outputs to a model prompt is known as "few-shot prompting". There are a few things to think about when doing few-shot prompting:
+One of the most effective ways to improve model performance is to give a model examples of 
+what you want it to do. The technique of adding example inputs and expected outputs 
+to a model prompt is known as "few-shot prompting". The technique is based on the
+[Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) paper.
+There are a few things to think about when doing few-shot prompting:

 1. How are examples generated?
 2. How many examples are in each prompt?
@@ -1182,8 +1179,10 @@ You can see a case study of how Anthropic and OpenAI respond to different few-sh

 ### Retrieval

-LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). 
-Retrieval is the process of providing relevant information to an LLM to improve its response for a given input. Retrieval augmented generation (RAG) is the process of grounding the LLM generation (output) using the retrieved information.
+LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. 
+Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). 
+`Retrieval` is the process of providing relevant information to an LLM to improve its response for a given input. 
+`Retrieval augmented generation` (`RAG`) [paper](https://arxiv.org/abs/2005.11401) is the process of grounding the LLM generation (output) using the retrieved information.

 :::tip

@@ -1203,12 +1202,12 @@ First, consider the user input(s) to your RAG system. Ideally, a RAG system can
 **Using an LLM to review and optionally modify the input is the central idea behind query translation.** This serves as a general buffer, optimizing raw user inputs for your retrieval system. 
 For example, this can be as simple as extracting keywords or as complex as generating multiple sub-questions for a complex query.

-| Name          | When to use | Description |
-|---------------|-------------|-------------|
+| Name          | When to use | Description                                                                                                                                                                                                                                                                            |
+|---------------|-------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [Multi-query](/docs/how_to/MultiQueryRetriever/)   | When you need to cover multiple perspectives of a question. | Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, return the unique documents for all queries. |
-| [Decomposition](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb) | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer). |
-| [Step-back](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)     | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question. |
-| [HyDE](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)          | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches. |
+| [Decomposition](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb) | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer).                                                           |
+| [Step-back](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)     | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question. [Paper](https://arxiv.org/pdf/2310.06117).                                            |
+| [HyDE](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)          | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches. [Paper](https://arxiv.org/abs/2212.10496). |

 :::tip

@@ -1282,11 +1281,11 @@ Fifth, consider ways to improve the quality of your similarity search itself. Em

 There are some additional tricks to improve the quality of your retrieval. Embeddings excel at capturing semantic information, but may struggle with keyword-based queries. Many [vector stores](/docs/integrations/retrievers/pinecone_hybrid_search/) offer built-in [hybrid-search](https://docs.pinecone.io/guides/data/understanding-hybrid-search) to combine keyword and semantic similarity, which marries the benefits of both approaches. Furthermore, many vector stores have [maximal marginal relevance](https://python.langchain.com/v0.1/docs/modules/model_io/prompts/example_selectors/mmr/), which attempts to diversify the results of a search to avoid returning similar and redundant documents. 

-| Name              | When to use                                              | Description |
-|-------------------|----------------------------------------------------------|-------------|
-| [ColBERT](/docs/integrations/providers/ragatouille/#using-colbert-as-a-reranker)           | When higher granularity embeddings are needed.           | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score. |
-| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)     | When combining keyword-based and semantic similarity.    | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. |
-| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents. |
+| Name              | When to use                                              | Description                                                                                                                                                                            |
+|-------------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [ColBERT](/docs/integrations/providers/ragatouille/#using-colbert-as-a-reranker)           | When higher granularity embeddings are needed.           | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score. [Paper](https://arxiv.org/abs/2112.01488). |
+| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)     | When combining keyword-based and semantic similarity.    | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. [Paper](https://arxiv.org/abs/2210.11934).                                                                               |
+| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents.                                                                                  |

 :::tip

@@ -1306,7 +1305,7 @@ Sixth, consider ways to filter or rank retrieved documents. This is very useful

 :::tip

-See our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared), on approach for post-processing across multiple queries:  Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).
+See our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared) ([paper](https://arxiv.org/abs/2402.03367)), on approach for post-processing across multiple queries:  Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).

 :::

--- a/docs/docs/contributing/faq.mdx
+++ b/docs/docs/contributing/faq.mdx
@@ -24,3 +24,16 @@ for more information.
 Notably, Github doesn't allow this setting to be enabled for forks in **organizations** ([issue](https://github.com/orgs/community/discussions/5634)).
 If you are working in an organization, we recommend submitting your PR from a personal
 fork in order to enable this setting.
+
+### Why hasn't my PR been reviewed?
+
+Please reference our [Review Process](/docs/contributing/review_process/).
+
+### Why was my PR closed?
+
+Please reference our [Review Process](/docs/contributing/review_process/).
+
+### I think my PR was closed in a way that didn't follow the review process. What should I do?
+
+Tag `@efriis` in the PR comments referencing the portion of the review
+process that you believe was not followed. We'll take a look!
--- a/docs/docs/contributing/review_process.mdx
+++ b/docs/docs/contributing/review_process.mdx
@@ -0,0 +1,95 @@
+# Review Process
+
+## Overview
+
+This document outlines the process used by the LangChain maintainers for reviewing pull requests (PRs). The primary objective of this process is to enhance the LangChain developer experience.
+
+## Review Statuses
+
+We categorize PRs using three main statuses, which are marked as project item statuses in the right sidebar and can be viewed in detail [here](https://github.com/orgs/langchain-ai/projects/12/views/1).
+
+- **Triage**: 
+  - Initial status for all newly submitted PRs.
+  - Requires a maintainer to categorize it into one of the other statuses.
+
+- **Needs Support**:
+  - PRs that require community feedback or additional input before moving forward.
+  - Automatically promoted to the backlog if it receives 5 upvotes.
+  - An auto-comment is generated when this status is applied, explaining the flow and the upvote requirement.
+  - If the PR remains in this status for 25 days, it will be marked as “stale” via auto-comment.
+  - PRs will be auto-closed after 30 days if no further action is taken.
+
+- **In Review**:
+  - PRs that are actively under review by our team.
+  - These are regularly reviewed and monitored.
+
+**Note:** A PR may only have one status at a time.
+
+**Note:** You may notice 3 additional statuses of Done, Closed, and Internal that
+are external to this lifecycle. Done and Closed PRs have been merged or closed,
+respectively. Internal is for PRs submitted by core maintainers, and these PRs are owned
+by the submitter.
+
+## Review Guidelines
+
+1. **PRs that touch /libs/core**:
+   - PRs that directly impact core code and are likely to affect end users.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - These PRs are given top priority and are reviewed the fastest.
+   - PRs that don't have a **concise** descriptions of their motivation (either in PR summary of in a linked issue) are likely to be closed without an in-depth review. Please do not generate verbose PR descriptions with an LLM.
+   - PRs that don't have unit tests are likely to be closed.
+   - Feature requests should first be opened as a GitHub issue and discussed with the LangChain maintainers. Large PRs submitted without prior discussion are likely to be closed.
+
+2. **PRs that touch /libs/langchain**:
+   - High-impact PRs that are closely related to core PRs but slightly lower in priority.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - These are reviewed and closed aggressively, similar to core PRs.
+   - New feature requests should be discussed with the core maintainer team beforehand in an issue.
+
+3. **PRs that touch /libs/partners/****:
+   - PRs involving integration packages.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - The review may be conducted by our team or handed off to the partner's development team, depending on the PR's content.
+   - We maintain communication lines with most partner dev teams to facilitate this process.
+
+4. **Community PRs**:
+   - Most community PRs will get an initial status of "needs support".
+   - **Triage Guideline**: most PRs should go to `Needs support`. Bugfixes on high-traffic integrations should go straight to `In review`.
+   - **Triage Guideline**: all new features and integrations should go to `Needs support` and will be closed if they do not get enough support (measured by upvotes or comments).
+   - PRs in the `Needs Support` status for 20 days are marked as “stale” and will be closed after 30 days if no action is taken.
+
+5. **Documentation PRs**:
+   - PRs that touch the documentation content in docs/docs.
+   - **Triage Guideline**:
+      - PRs that fix typos or small errors in a single file and pass CI should go straight to `In Review`.
+      - PRs that make changes that have been discussed and agreed upon in an issue should go straight to `In Review`.
+      - PRs that add new pages or change the structure of the documentation should go to `Needs Support`.
+   - We strive to standardize documentation formats to streamline the review process.
+   - CI jobs run against documentation to ensure adherence to standards, automating much of the review.
+
+6. **PRs must be in English**:
+   - PRs that are not in English will be closed without review.
+   - This is to ensure that all maintainers can review the PRs effectively.
+
+## How to see a PR's status
+
+See screenshot:
+
+![PR Status](/img/review_process_status.png)
+
+*To see the status of all open PRs, please visit the [LangChain Project Board](https://github.com/orgs/langchain-ai/projects/12/views/2).*
+
+## Review Prioritization
+
+Our goal is to provide the best possible development experience by focusing on making software that:
+
+- Works: Works as intended (is bug-free).
+- Is useful: Improves LLM app development with components that work off-the-shelf and runtimes that simplify app building.
+- Is easy: Is intuitive to use and well-documented.
+
+We believe this process reflects our priorities and are open to feedback if you feel it does not.
+
+## Github Discussion
+
+We welcome your feedback on this process. Please feel free to add a comment in 
+[this GitHub Discussion](https://github.com/langchain-ai/langchain/discussions/25920).
--- a/docs/docs/how_to/custom_tools.ipynb
+++ b/docs/docs/how_to/custom_tools.ipynb
@@ -9,12 +9,12 @@
    "\n",
    "When constructing an agent, you will need to provide it with a list of `Tool`s that it can use. Besides the actual function that is called, the Tool consists of several components:\n",
    "\n",
-    "| Attribute       | Type                      | Description                                                                                                      |\n",
-    "|-----------------|---------------------------|------------------------------------------------------------------------------------------------------------------|\n",
-    "| name          | str                     | Must be unique within a set of tools provided to an LLM or agent.                                           |\n",
-    "| description   | str                     | Describes what the tool does. Used as context by the LLM or agent.                                       |\n",
-    "| args_schema   | Pydantic BaseModel      | Optional but recommended, can be used to provide more information (e.g., few-shot examples) or validation for expected parameters |\n",
-    "| return_direct   | boolean      | Only relevant for agents. When True, after invoking the given tool, the agent will stop and return the result direcly to the user.  |\n",
+    "| Attribute     | Type                            | Description                                                                                                                                                                    |\n",
+    "|---------------|---------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n",
+    "| name          | str                             | Must be unique within a set of tools provided to an LLM or agent.                                                                                                              |\n",
+    "| description   | str                             | Describes what the tool does. Used as context by the LLM or agent.                                                                                                             |\n",
+    "| args_schema   | langchain.pydantic_v1.BaseModel | Optional but recommended, and required if using callback handlers. It can be used to provide more information (e.g., few-shot examples) or validation for expected parameters. |\n",
+    "| return_direct | boolean                         | Only relevant for agents. When True, after invoking the given tool, the agent will stop and return the result direcly to the user.                                             |\n",
    "\n",
    "LangChain supports the creation of tools from:\n",
    "\n",
--- a/docs/docs/how_to/embed_text.mdx
+++ b/docs/docs/how_to/embed_text.mdx
@@ -8,7 +8,7 @@ The Embeddings class is a class designed for interfacing with text embedding mod

 Embeddings create a vector representation of a piece of text. This is useful because it means we can think about text in the vector space, and do things like semantic search where we look for pieces of text that are most similar in the vector space.

-The base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query. The former, `.embed_documents`, takes as input multiple texts, while the latter, `.embed_query`, takes a single text. The reason for having these as two separate methods is that some embedding providers have different embedding methods for documents (to be searched over) vs queries (the search query itself). 
+The base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query. The former, `.embed_documents`, takes as input multiple texts, while the latter, `.embed_query`, takes a single text. The reason for having these as two separate methods is that some embedding providers have different embedding methods for documents (to be searched over) vs queries (the search query itself).
 `.embed_query` will return a list of floats, whereas `.embed_documents` returns a list of lists of floats.

 ## Get started
@@ -94,15 +94,6 @@ from langchain_huggingface import HuggingFaceEmbeddings

 embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 ```
-
-You can also leave the `model_name` blank to use the default [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) model.
-
-```python
-from langchain_huggingface import HuggingFaceEmbeddings
-
-embeddings_model = HuggingFaceEmbeddings()
-```
-
  </TabItem>
 </Tabs>

--- a/docs/docs/how_to/installation.mdx
+++ b/docs/docs/how_to/installation.mdx
@@ -9,7 +9,7 @@ functionality to install.

 ## Official release

-To install the main LangChain package, run:
+To install the main `langchain` package, run:

 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
@@ -26,8 +26,7 @@ import CodeBlock from "@theme/CodeBlock";

 While this package acts as a sane starting point to using LangChain,
 much of the value of LangChain comes when integrating it with various model providers, datastores, etc.
-By default, the dependencies needed to do that are NOT installed. You will need to install the dependencies for specific integrations separately.
-We'll show how to do that in the next sections of this guide.
+By default, the dependencies needed to do that are NOT installed. You will need to install the dependencies for specific integrations separately, which we show below.

 ## Ecosystem packages

@@ -41,14 +40,6 @@ When installing a package, you do not need to explicitly install that package's
 However, you may choose to if you are using a feature only available in a certain version of that dependency.
 If you do, you should make sure that the installed or pinned version is compatible with any other integration packages you use.

-### From source
-
-If you want to install from source, you can do so by cloning the repo and be sure that the directory is `PATH/TO/REPO/langchain/libs/langchain` running:
-
-```bash
-pip install -e .
-```
-
 ### LangChain core
 The `langchain-core` package contains base abstractions that the rest of the LangChain ecosystem uses, along with the LangChain Expression Language. It is automatically installed by `langchain`, but can also be used separately. Install with:

@@ -56,8 +47,18 @@ The `langchain-core` package contains base abstractions that the rest of the Lan
 pip install langchain-core
 ```

-### LangChain community
-The `langchain-community` package contains third-party integrations. Install with:
+### Integration packages
+
+Certain integrations like OpenAI and Anthropic have their own packages.
+Any integrations that require their own package will be documented as such in the [Integration docs](/docs/integrations/platforms/).
+You can see a list of all integration packages in the [API reference](https://api.python.langchain.com) under the "Partner libs" dropdown.
+To install one of these run:
+
+```bash
+pip install langchain-openai
+```
+
+Any integrations that haven't been split out into their own packages will live in the `langchain-community` package. Install with:

 ```bash
 pip install langchain-community
@@ -89,7 +90,7 @@ pip install "langserve[all]"
 ```
 for both client and server dependencies. Or `pip install "langserve[client]"` for client code, and `pip install "langserve[server]"` for server code.

-## LangChain CLI
+### LangChain CLI
 The LangChain CLI is useful for working with LangChain templates and other LangServe projects.
 Install with:

@@ -105,3 +106,13 @@ If you are not using LangChain, you can install it with:
 ```bash
 pip install langsmith
 ```
+
+### From source
+
+If you want to install a package from source, you can do so by cloning the [main LangChain repo](https://github.com/langchain-ai/langchain), enter the directory of the package you want to install `PATH/TO/REPO/langchain/libs/{package}`, and run:
+
+```bash
+pip install -e .
+```
+
+LangGraph, LangSmith SDK, and certain integration packages live outside the main LangChain repo. You can see [all repos here](https://github.com/langchain-ai).
--- a/docs/docs/how_to/migrate_agent.ipynb
+++ b/docs/docs/how_to/migrate_agent.ipynb
@@ -82,7 +82,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "1e425fea-2796-4b99-bee6-9a6ffe73f756",
   "metadata": {},
   "outputs": [],
@@ -115,7 +115,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "03ea357c-9c36-4464-b2cc-27bd150e1554",
   "metadata": {},
   "outputs": [
@@ -126,7 +126,7 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -162,7 +162,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "53a3737a-d167-4255-89bf-20ac37f89a3e",
   "metadata": {},
   "outputs": [
@@ -173,7 +173,7 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -193,7 +193,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "74ecebe3-512e-409c-a661-bdd5b0a2b782",
   "metadata": {},
   "outputs": [
@@ -201,10 +201,10 @@
     "data": {
      "text/plain": [
       "{'input': 'Pardon?',\n",
-       " 'output': 'The value you get when you apply `magic_function` to the input 3 is 5.'}"
+       " 'output': 'The value returned by `magic_function` when the input is 3 is 5.'}"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -243,7 +243,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "a9a11ccd-75e2-4c11-844d-a34870b0ff91",
   "metadata": {},
   "outputs": [
@@ -254,7 +254,7 @@
       " 'output': 'El valor de `magic_function(3)` es 5.'}"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -295,7 +295,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "a9486805-676a-4d19-a5c4-08b41b172989",
   "metadata": {},
   "outputs": [],
@@ -324,7 +324,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "id": "d369ab45-0c82-45f4-9d3e-8efb8dd47e2c",
   "metadata": {},
   "outputs": [
@@ -332,7 +332,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'input': 'what is the value of magic_function(3)?', 'output': 'El valor de magic_function(3) es 5. ¡Pandamonium!'}\n"
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'The value of magic_function(3) is 5. ¡Pandamonium!'}\n"
     ]
    }
   ],
@@ -386,7 +386,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "id": "b97beba5-8f74-430c-9399-91b77c8fa15c",
   "metadata": {},
   "outputs": [
@@ -394,11 +394,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hi Polly! The output of the magic function for the input 3 is 5.\n",
+      "Hi Polly! The output of applying the magic function to the input 3 is 5.\n",
      "---\n",
-      "Yes, your name is Polly!\n",
+      "Yes, you mentioned your name is Polly.\n",
      "---\n",
-      "The output of the magic function for the input 3 is 5.\n"
+      "The output of applying the magic function to the input 3 is 5.\n"
     ]
    }
   ],
@@ -476,7 +476,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "id": "baca3dc6-678b-4509-9275-2fd653102898",
   "metadata": {},
   "outputs": [
@@ -484,16 +484,16 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hi Polly! The output of the magic_function for the input of 3 is 5.\n",
+      "Hi Polly! The output of applying the magic function to the input 3 is 5.\n",
      "---\n",
      "Yes, your name is Polly!\n",
      "---\n",
-      "The output of the magic_function for the input of 3 was 5.\n"
+      "The output of applying the magic function to the input 3 was 5.\n"
     ]
    }
   ],
   "source": [
-    "from langgraph.checkpoint import MemorySaver  # an in-memory checkpointer\n",
+    "from langgraph.checkpoint.memory import MemorySaver  # an in-memory checkpointer\n",
    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
    "system_message = \"You are a helpful assistant.\"\n",
@@ -544,7 +544,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "id": "e62843c4-1107-41f0-a50b-aea256e28053",
   "metadata": {},
   "outputs": [
@@ -552,8 +552,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'actions': [ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1exy0rScfPmo4fy27FbQ5qJ2')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])]}\n",
-      "{'steps': [AgentStep(action=ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1exy0rScfPmo4fy27FbQ5qJ2'), observation=5)], 'messages': [FunctionMessage(content='5', name='magic_function')]}\n",
+      "{'actions': [ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_gNzQT96XWoyZqVl1jI1yMnjy')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])]}\n",
+      "{'steps': [AgentStep(action=ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_gNzQT96XWoyZqVl1jI1yMnjy'), observation=5)], 'messages': [FunctionMessage(content='5', name='magic_function')]}\n",
      "{'output': 'The value of `magic_function(3)` is 5.', 'messages': [AIMessage(content='The value of `magic_function(3)` is 5.')]}\n"
     ]
    }
@@ -604,7 +604,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "id": "076ebc85-f804-4093-a25a-a16334c9898e",
   "metadata": {},
   "outputs": [
@@ -612,9 +612,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_my9rzFSKR4T1yYKwCsfbZB8A', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 61, 'total_tokens': 75}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_bc2a86f5f5', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-dd705555-8fae-4fb1-a033-5d99a23e3c22-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_my9rzFSKR4T1yYKwCsfbZB8A', 'type': 'tool_call'}], usage_metadata={'input_tokens': 61, 'output_tokens': 14, 'total_tokens': 75})]}}\n",
-      "{'tools': {'messages': [ToolMessage(content='5', name='magic_function', tool_call_id='call_my9rzFSKR4T1yYKwCsfbZB8A')]}}\n",
-      "{'agent': {'messages': [AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 84, 'total_tokens': 98}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-698cad05-8cb2-4d08-8c2a-881e354f6cc7-0', usage_metadata={'input_tokens': 84, 'output_tokens': 14, 'total_tokens': 98})]}}\n"
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_I0nztlIcc0e9ry5dn53YLZUM', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 61, 'total_tokens': 75}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-5f9bd87d-3692-4d13-8d27-1859e13e2156-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_I0nztlIcc0e9ry5dn53YLZUM', 'type': 'tool_call'}], usage_metadata={'input_tokens': 61, 'output_tokens': 14, 'total_tokens': 75})]}}\n",
+      "{'tools': {'messages': [ToolMessage(content='5', name='magic_function', tool_call_id='call_I0nztlIcc0e9ry5dn53YLZUM')]}}\n",
+      "{'agent': {'messages': [AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 84, 'total_tokens': 98}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-f6015ca6-93e5-45e8-8b28-b3f0a8d203dc-0', usage_metadata={'input_tokens': 84, 'output_tokens': 14, 'total_tokens': 98})]}}\n"
     ]
    }
   ],
@@ -654,7 +654,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
   "id": "a2f720f3-c121-4be2-b498-92c16bb44b0a",
   "metadata": {},
   "outputs": [
@@ -662,7 +662,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-a792db4a-278d-4090-82ae-904a30eada93', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_uPZ2D1Bo5mdED3gwgaeWURrf'), 5)]\n"
+      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-99e06b70-1ef6-4761-834b-87b6c5252e20', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_wjaAyTjI2LSYOq7C8QZYSxEs'), 5)]\n"
     ]
    }
   ],
@@ -684,20 +684,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
   "id": "ef23117a-5ccb-42ce-80c3-ea49a9d3a942",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='cd7d0f49-a0e0-425a-b2b0-603a716058ed'),\n",
-       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_VfZ9287DuybOSrBsQH5X12xf', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a1e965cd-bf61-44f9-aec1-8aaecb80955f-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_VfZ9287DuybOSrBsQH5X12xf', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}),\n",
-       "  ToolMessage(content='5', name='magic_function', id='20d5c2fe-a5d8-47fa-9e04-5282642e2039', tool_call_id='call_VfZ9287DuybOSrBsQH5X12xf'),\n",
-       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 78, 'total_tokens': 92}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-abf9341c-ef41-4157-935d-a3be5dfa2f41-0', usage_metadata={'input_tokens': 78, 'output_tokens': 14, 'total_tokens': 92})]}"
+       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='2d369331-8052-4167-bd85-9f6d8ad021ae'),\n",
+       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_oXiSQSe6WeWj7XIKXxZrO2IC', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-297e7fc9-726f-46a0-8c67-dc28ed1724d0-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_oXiSQSe6WeWj7XIKXxZrO2IC', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}),\n",
+       "  ToolMessage(content='5', name='magic_function', id='46370faf-9598-423c-b94b-aca8cb4f035d', tool_call_id='call_oXiSQSe6WeWj7XIKXxZrO2IC'),\n",
+       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 78, 'total_tokens': 92}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-f48efaff-0c2c-4632-bbf9-7ee626f73d02-0', usage_metadata={'input_tokens': 78, 'output_tokens': 14, 'total_tokens': 92})]}"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -757,7 +757,7 @@
      "Invoking: `magic_function` with `{'input': '3'}`\n",
      "\n",
      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mParece que hubo un error al intentar calcular el valor de la función mágica. ¿Te gustaría que lo intente de nuevo?\u001b[0m\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mHubo un error al intentar obtener el valor de `magic_function(3)`. ¿Podrías intentarlo de nuevo o proporcionar más detalles?\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -766,7 +766,7 @@
     "data": {
      "text/plain": [
       "{'input': 'what is the value of magic_function(3)?',\n",
-       " 'output': 'Parece que hubo un error al intentar calcular el valor de la función mágica. ¿Te gustaría que lo intente de nuevo?'}"
+       " 'output': 'Hubo un error al intentar obtener el valor de `magic_function(3)`. ¿Podrías intentarlo de nuevo o proporcionar más detalles?'}"
      ]
     },
     "execution_count": 17,
@@ -819,12 +819,15 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='what is the value of magic_function(3)?' id='74e2d5e8-2b59-4820-979c-8d11ecfc14c2'\n",
-      "content='' additional_kwargs={'tool_calls': [{'id': 'call_ihtrH6IG95pDXpKluIwAgi3J', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-5a35e465-8a08-43dd-ac8b-4a76dcace305-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_ihtrH6IG95pDXpKluIwAgi3J', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
-      "content='Sorry, there was an error. Please try again.' name='magic_function' id='8c37c19b-3586-46b1-aab9-a045786801a2' tool_call_id='call_ihtrH6IG95pDXpKluIwAgi3J'\n",
-      "content='It seems there was an error in processing the request. Let me try again.' additional_kwargs={'tool_calls': [{'id': 'call_iF0vYWAd6rfely0cXSqdMOnF', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 88, 'total_tokens': 119}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-eb88ec77-d492-43a5-a5dd-4cefef9a6920-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_iF0vYWAd6rfely0cXSqdMOnF', 'type': 'tool_call'}] usage_metadata={'input_tokens': 88, 'output_tokens': 31, 'total_tokens': 119}\n",
-      "content='Sorry, there was an error. Please try again.' name='magic_function' id='c9ff261f-a0f1-4c92-a9f2-cd749f62d911' tool_call_id='call_iF0vYWAd6rfely0cXSqdMOnF'\n",
-      "content='I am currently unable to process the request with the input \"3\" for the `magic_function`. If you have any other questions or need assistance with something else, please let me know!' response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 141, 'total_tokens': 180}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None} id='run-d42508aa-f286-4b57-80fb-f8a76736d470-0' usage_metadata={'input_tokens': 141, 'output_tokens': 39, 'total_tokens': 180}\n"
+      "content='what is the value of magic_function(3)?' id='fe74bb30-45b8-4a40-a5ed-fd6678da5428'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_TNKfNy6fgZNdJAvHUMXwtp8f', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-dad8bfc1-477c-40d2-9016-243d25c0dd13-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_TNKfNy6fgZNdJAvHUMXwtp8f', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='653226e0-3187-40be-a774-4c7c2612239e' tool_call_id='call_TNKfNy6fgZNdJAvHUMXwtp8f'\n",
+      "content='It looks like there was an issue with processing the request. Let me try that again.' additional_kwargs={'tool_calls': [{'id': 'call_K0wJ8fQLYGv8fYXY1Uo5U5sG', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 88, 'total_tokens': 121}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-d4c85437-6625-4e57-81f9-86de6842be7b-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_K0wJ8fQLYGv8fYXY1Uo5U5sG', 'type': 'tool_call'}] usage_metadata={'input_tokens': 88, 'output_tokens': 33, 'total_tokens': 121}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='9b530d03-95df-401e-bb4f-5cada1195033' tool_call_id='call_K0wJ8fQLYGv8fYXY1Uo5U5sG'\n",
+      "content='It seems that there is a persistent issue with processing the request. Let me attempt it one more time.' additional_kwargs={'tool_calls': [{'id': 'call_7ECwwNBDo4SH56oczErZJVRT', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 143, 'total_tokens': 179}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-9f3f651e-a641-4112-99ed-d1ac11169582-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_7ECwwNBDo4SH56oczErZJVRT', 'type': 'tool_call'}] usage_metadata={'input_tokens': 143, 'output_tokens': 36, 'total_tokens': 179}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='e4cd152b-4eb1-47df-ac76-f88e79adbe19' tool_call_id='call_7ECwwNBDo4SH56oczErZJVRT'\n",
+      "content=\"It seems there is a consistent issue with processing the request for the magic function. Let's try using a different approach to resolve this.\" additional_kwargs={'tool_calls': [{'id': 'call_DMAL0UwBRijzuPjCTSwR2r17', 'function': {'arguments': '{\"input\":\"three\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 201, 'total_tokens': 242}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-cd9f4e5c-f881-462c-abe3-890e73f46a01-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 'three'}, 'id': 'call_DMAL0UwBRijzuPjCTSwR2r17', 'type': 'tool_call'}] usage_metadata={'input_tokens': 201, 'output_tokens': 41, 'total_tokens': 242}\n",
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
   ],
@@ -939,9 +942,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_FKiTkTd0Ffd4rkYSzERprf1M', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-b842f7b6-ec10-40f8-8c0e-baa220b77e91-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_FKiTkTd0Ffd4rkYSzERprf1M', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_o8Ym0u9UfzArhIm1lV7O0CXF', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d9faf125-1ff8-4de2-a75b-97e07d28dc4d-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_o8Ym0u9UfzArhIm1lV7O0CXF', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
      "------\n",
-      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to a step timeout.'}\n"
     ]
    }
   ],
@@ -957,7 +960,7 @@
    "        print(chunk)\n",
    "        print(\"------\")\n",
    "except TimeoutError:\n",
-    "    print({\"input\": query, \"output\": \"Agent stopped due to max iterations.\"})"
+    "    print({\"input\": query, \"output\": \"Agent stopped due to a step timeout.\"})"
   ]
  },
  {
@@ -978,7 +981,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_WoOB8juagB08xrP38twYlYKR', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-73dee47e-30ab-42c9-bb0c-6f227cac96cd-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_WoOB8juagB08xrP38twYlYKR', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_gsGzyhyvR25iNV6W9VR2TIdQ', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-9ad8f834-06c5-41cf-9eec-6b7e0f5e777e-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_gsGzyhyvR25iNV6W9VR2TIdQ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
      "------\n",
      "Task Cancelled.\n"
     ]
@@ -1089,10 +1092,10 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='what is the value of magic_function(3)?' id='4fa7fbe5-758c-47a3-9268-717665d10680'\n",
-      "content='' additional_kwargs={'tool_calls': [{'id': 'call_ujE0IQBbIQnxcF9gsZXQfdhF', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-65d689aa-baee-4342-a5d2-048feefab418-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_ujE0IQBbIQnxcF9gsZXQfdhF', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
-      "content='Sorry there was an error, please try again.' name='magic_function' id='ef8ddf1d-9ad7-4ac0-b784-b673c4d94bbd' tool_call_id='call_ujE0IQBbIQnxcF9gsZXQfdhF'\n",
-      "content='It seems there was an issue with the previous attempt. Let me try that again.' additional_kwargs={'tool_calls': [{'id': 'call_GcsAfCFUHJ50BN2IOWnwTbQ7', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 87, 'total_tokens': 119}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-54527c4b-8ff0-4ee8-8abf-224886bd222e-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_GcsAfCFUHJ50BN2IOWnwTbQ7', 'type': 'tool_call'}] usage_metadata={'input_tokens': 87, 'output_tokens': 32, 'total_tokens': 119}\n",
+      "content='what is the value of magic_function(3)?' id='6487a942-0a9a-4e8a-9556-553a45fa9c5a'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_pe5KVY5No9iT4JWqrm5MwL1D', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-04147325-fb72-462a-a1d9-6aa4e86e3d8a-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_pe5KVY5No9iT4JWqrm5MwL1D', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
+      "content='Sorry there was an error, please try again.' name='magic_function' id='bc0bf58f-7c6c-42ed-a96d-a2afa79f16a9' tool_call_id='call_pe5KVY5No9iT4JWqrm5MwL1D'\n",
+      "content=\"It seems there was an issue with processing the request. I'll try again.\" additional_kwargs={'tool_calls': [{'id': 'call_5rV7k3g7oW38bD9KUTsSxK8l', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 87, 'total_tokens': 117}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-6e43ffd4-fb6f-4222-8503-a50ae268c0be-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_5rV7k3g7oW38bD9KUTsSxK8l', 'type': 'tool_call'}] usage_metadata={'input_tokens': 87, 'output_tokens': 30, 'total_tokens': 117}\n",
      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
@@ -1322,7 +1325,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.2"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/tool_choice.ipynb
+++ b/docs/docs/how_to/tool_choice.ipynb
@@ -14,7 +14,7 @@
    "- [How to use a model to call tools](/docs/how_to/tool_calling)\n",
    ":::\n",
    "\n",
-    "In order to force our LLM to spelect a specific tool, we can use the `tool_choice` parameter to ensure certain behavior. First, let's define our model and tools:"
+    "In order to force our LLM to select a specific tool, we can use the `tool_choice` parameter to ensure certain behavior. First, let's define our model and tools:"
   ]
  },
  {
--- a/docs/docs/integrations/chat/cerebras.ipynb
+++ b/docs/docs/integrations/chat/cerebras.ipynb
@@ -0,0 +1,423 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "afaf8039",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Cerebras\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e49f1e0d",
+   "metadata": {},
+   "source": [
+    "# ChatCerebras\n",
+    "\n",
+    "This notebook provides a quick overview for getting started with Cerebras [chat models](/docs/concepts/#chat-models). For detailed documentation of all ChatCerebras features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html).\n",
+    "\n",
+    "At Cerebras, we've developed the world's largest and fastest AI processor, the Wafer-Scale Engine-3 (WSE-3). The Cerebras CS-3 system, powered by the WSE-3, represents a new class of AI supercomputer that sets the standard for generative AI training and inference with unparalleled performance and scalability.\n",
+    "\n",
+    "With Cerebras as your inference provider, you can:\n",
+    "- Achieve unprecedented speed for AI inference workloads\n",
+    "- Build commercially with high throughput\n",
+    "- Effortlessly scale your AI workloads with our seamless clustering technology\n",
+    "\n",
+    "Our CS-3 systems can be quickly and easily clustered to create the largest AI supercomputers in the world, making it simple to place and run the largest models. Leading corporations, research institutions, and governments are already using Cerebras solutions to develop proprietary models and train popular open-source models.\n",
+    "\n",
+    "Want to experience the power of Cerebras? Check out our [website](https://cerebras.ai) for more resources and explore options for accessing our technology through the Cerebras Cloud or on-premise deployments!\n",
+    "\n",
+    "For more information about Cerebras Cloud, visit [cloud.cerebras.ai](https://cloud.cerebras.ai/). Our API reference is available at [inference-docs.cerebras.ai](https://inference-docs.cerebras.ai/).\n",
+    "\n",
+    "## Overview\n",
+    "### Integration details\n",
+    "\n",
+    "| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/chat/cerebras) | Package downloads | Package latest |\n",
+    "| :--- | :--- | :---: | :---: |  :---: | :---: | :---: |\n",
+    "| [ChatCerebras](https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html) | [langchain-cerebras](https://api.python.langchain.com/en/latest/cerebras_api_reference.html) | ❌ | beta | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-cerebras?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-cerebras?style=flat-square&label=%20) |\n",
+    "\n",
+    "### Model features\n",
+    "| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
+    "| :---: | :---: | :---: | :---: |  :---: | :---: | :---: | :---: | :---: | :---: |\n",
+    "| ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅  | ✅ | ❌ | \n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "```bash\n",
+    "pip install langchain-cerebras\n",
+    "```\n",
+    "\n",
+    "### Credentials\n",
+    "\n",
+    "Get an API Key from [cloud.cerebras.ai](https://cloud.cerebras.ai/) and add it to your environment variables:\n",
+    "```\n",
+    "export CEREBRAS_API_KEY=\"your-api-key-here\"\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "ce19c2d6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Enter your Cerebras API key:  ········\n"
+     ]
+    }
+   ],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"CEREBRAS_API_KEY\"] = getpass.getpass(\"Enter your Cerebras API key: \")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72ee0c4b-9764-423a-9dbf-95129e185210",
+   "metadata": {},
+   "source": [
+    "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
+    "# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0730d6a1-c893-4840-9817-5e5251676d5d",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "The LangChain Cerebras integration lives in the `langchain-cerebras` package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "652d6238-1f87-422a-b135-f5abbb8652fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain-cerebras"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ea69675d",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our model object and generate chat completions:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21155898",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2b4f3e15",
+   "metadata": {},
+   "source": [
+    "## Invocation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "62e0dbc3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Je adore le programmation.', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 35, 'total_tokens': 42}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_be27ec77ff', 'finish_reason': 'stop'}, id='run-e5d66faf-019c-4ac6-9265-71093b13202d-0', usage_metadata={'input_tokens': 35, 'output_tokens': 7, 'total_tokens': 42})"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    (\n",
+    "        \"system\",\n",
+    "        \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
+    "    ),\n",
+    "    (\"human\", \"I love programming.\"),\n",
+    "]\n",
+    "ai_msg = llm.invoke(messages)\n",
+    "ai_msg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
+   "metadata": {},
+   "source": [
+    "## Chaining\n",
+    "\n",
+    "We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Ich liebe Programmieren!\\n\\n(Literally: I love programming!)', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 30, 'total_tokens': 44}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_be27ec77ff', 'finish_reason': 'stop'}, id='run-e1d2ebb8-76d1-471b-9368-3b68d431f16a-0', usage_metadata={'input_tokens': 30, 'output_tokens': 14, 'total_tokens': 44})"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"system\",\n",
+    "            \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
+    "        ),\n",
+    "        (\"human\", \"{input}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "chain = prompt | llm\n",
+    "chain.invoke(\n",
+    "    {\n",
+    "        \"input_language\": \"English\",\n",
+    "        \"output_language\": \"German\",\n",
+    "        \"input\": \"I love programming.\",\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0ec73a0e",
+   "metadata": {},
+   "source": [
+    "## Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "46fd21a7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "OH BOY! Let me tell you all about LIONS!\n",
+      "\n",
+      "Lions are the kings of the jungle! They're really big and have beautiful, fluffy manes around their necks. The mane is like a big, golden crown!\n",
+      "\n",
+      "Lions live in groups called prides. A pride is like a big family, and the lionesses (that's what we call the female lions) take care of the babies. The lionesses are like the mommies, and they teach the babies how to hunt and play.\n",
+      "\n",
+      "Lions are very good at hunting. They work together to catch their food, like zebras and antelopes. They're super fast and can run really, really fast!\n",
+      "\n",
+      "But lions are also very sleepy. They like to take long naps in the sun, and they can sleep for up to 20 hours a day! Can you imagine sleeping that much?\n",
+      "\n",
+      "Lions are also very loud. They roar really loudly to talk to each other. It's like they're saying, \"ROAR! I'm the king of the jungle!\"\n",
+      "\n",
+      "And guess what? Lions are very social. They like to play and cuddle with each other. They're like big, furry teddy bears!\n",
+      "\n",
+      "So, that's lions! Aren't they just the coolest?"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")\n",
+    "\n",
+    "system = \"You are an expert on animals who must answer questions in a manner that a 5 year old can understand.\"\n",
+    "human = \"I want to learn more about this animal: {animal}\"\n",
+    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
+    "\n",
+    "chain = prompt | llm\n",
+    "\n",
+    "for chunk in chain.stream({\"animal\": \"Lion\"}):\n",
+    "    print(chunk.content, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f67b6132",
+   "metadata": {},
+   "source": [
+    "## Async"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a3a45baf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Ice', response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 36, 'total_tokens': 38}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_be27ec77ff', 'finish_reason': 'stop'}, id='run-7434bdde-1bec-44cf-827b-8d978071dfe8-0', usage_metadata={'input_tokens': 36, 'output_tokens': 2, 'total_tokens': 38})"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"human\",\n",
+    "            \"Let's play a game of opposites. What's the opposite of {topic}? Just give me the answer with no extra input.\",\n",
+    "        )\n",
+    "    ]\n",
+    ")\n",
+    "chain = prompt | llm\n",
+    "await chain.ainvoke({\"topic\": \"fire\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f9d9945",
+   "metadata": {},
+   "source": [
+    "## Async Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "c7448e0f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "In the distant reaches of the cosmos, there existed a peculiar phenomenon known as the \"Eclipse of Eternity,\" a swirling vortex of darkness that had been shrouded in mystery for eons. It was said that this blackhole, born from the cataclysmic collision of two ancient stars, had been slowly devouring the fabric of space-time itself, warping the very essence of reality. As the celestial bodies of the galaxy danced around it, they began to notice a strange, almost imperceptible distortion in the fabric of space, as if the blackhole's gravitational pull was exerting an influence on the very course of events itself.\n",
+      "\n",
+      "As the centuries passed, astronomers from across the galaxy became increasingly fascinated by the Eclipse of Eternity, pouring over ancient texts and scouring the cosmos for any hint of its secrets. One such scholar, a brilliant and reclusive astrophysicist named Dr. Elara Vex, became obsessed with unraveling the mysteries of the blackhole. She spent years pouring over ancient texts, deciphering cryptic messages and hidden codes that hinted at the existence of a long-lost civilization that had once thrived in the heart of the blackhole itself. According to legend, this ancient civilization had possessed knowledge of the cosmos that was beyond human comprehension, and had used their mastery of the universe to create the Eclipse of Eternity as a gateway to other dimensions.\n",
+      "\n",
+      "As Dr. Vex delved deeper into her research, she began to experience strange and vivid dreams, visions that seemed to transport her to the very heart of the blackhole itself. In these dreams, she saw ancient beings, their faces twisted in agony as they were consumed by the void. She saw stars and galaxies, their light warped and distorted by the blackhole's gravitational pull. And she saw the Eclipse of Eternity itself, its swirling vortex of darkness pulsing with an otherworldly energy that seemed to be calling to her. As the dreams grew more vivid and more frequent, Dr. Vex became convinced that she was being drawn into the heart of the blackhole, and that the secrets of the universe lay waiting for her on the other side."
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"human\",\n",
+    "            \"Write a long convoluted story about {subject}. I want {num_paragraphs} paragraphs.\",\n",
+    "        )\n",
+    "    ]\n",
+    ")\n",
+    "chain = prompt | llm\n",
+    "\n",
+    "async for chunk in chain.astream({\"num_paragraphs\": 3, \"subject\": \"blackholes\"}):\n",
+    "    print(chunk.content, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For detailed documentation of all ChatCerebras features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/chat/huggingface.ipynb
+++ b/docs/docs/integrations/chat/huggingface.ipynb
@@ -404,6 +404,7 @@
    "        max_new_tokens=512,\n",
    "        do_sample=False,\n",
    "        repetition_penalty=1.03,\n",
+    "        return_full_text=False,\n",
    "    ),\n",
    "    model_kwargs={\"quantization_config\": quantization_config},\n",
    ")\n",
--- a/docs/docs/integrations/llms/huggingface_endpoint.ipynb
+++ b/docs/docs/integrations/llms/huggingface_endpoint.ipynb
@@ -210,6 +210,13 @@
    ")\n",
    "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This same `HuggingFaceEndpoint` class can be used with a local [HuggingFace TGI instance](https://github.com/huggingface/text-generation-inference/blob/main/docs/source/index.md) serving the LLM. Check out the TGI [repository](https://github.com/huggingface/text-generation-inference/tree/main) for details on various hardware (GPU, TPU, Gaudi...) support."
+   ]
  }
 ],
 "metadata": {
--- a/docs/docs/integrations/llms/ipex_llm.ipynb
+++ b/docs/docs/integrations/llms/ipex_llm.ipynb
@@ -6,16 +6,272 @@
   "source": [
    "# IPEX-LLM\n",
    "\n",
-    "> [IPEX-LLM](https://github.com/intel-analytics/ipex-llm/) is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency. \n",
+    "> [IPEX-LLM](https://github.com/intel-analytics/ipex-llm) is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency.\n",
    "\n",
-    "This example goes over how to use LangChain to interact with `ipex-llm` for text generation. \n"
+    "- [IPEX-LLM on Intel GPU](#ipex-llm-on-intel-gpu)\n",
+    "- [IPEX-LLM on Intel CPU](#ipex-llm-on-intel-cpu)\n",
+    "\n",
+    "## IPEX-LLM on Intel GPU\n",
+    "\n",
+    "This example goes over how to use LangChain to interact with `ipex-llm` for text generation on Intel GPU. \n",
+    "\n",
+    "> **Note**\n",
+    ">\n",
+    "> It is recommended that only Windows users with Intel Arc A-Series GPU (except for Intel Arc A300-Series or Pro A60) run Jupyter notebook directly for section \"IPEX-LLM on Intel GPU\". For other cases (e.g. Linux users, Intel iGPU, etc.), it is recommended to run the code with Python scripts in terminal for best experiences.\n",
+    "\n",
+    "### Install Prerequisites\n",
+    "To benefit from IPEX-LLM on Intel GPUs, there are several prerequisite steps for tools installation and environment preparation.\n",
+    "\n",
+    "If you are a Windows user, visit the [Install IPEX-LLM on Windows with Intel GPU Guide](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_windows_gpu.md), and follow [Install Prerequisites](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_windows_gpu.md#install-prerequisites) to update GPU driver (optional) and install Conda.\n",
+    "\n",
+    "If you are a Linux user, visit the [Install IPEX-LLM on Linux with Intel GPU](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md), and follow [**Install Prerequisites**](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md#install-prerequisites) to install GPU driver, Intel® oneAPI Base Toolkit 2024.0, and Conda.\n",
+    "\n",
+    "### Setup\n",
+    "\n",
+    "After the prerequisites installation, you should have created a conda environment with all prerequisites installed. **Start the jupyter service in this conda environment**:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain langchain-community"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Setup"
+    "Install IEPX-LLM for running LLMs locally on Intel GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **Note**\n",
+    ">\n",
+    "> You can also use `https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/` as the extra-indel-url.\n",
+    "\n",
+    "### Runtime Configuration\n",
+    "\n",
+    "For optimal performance, it is recommended to set several environment variables based on your device:\n",
+    "\n",
+    "#### For Windows Users with Intel Core Ultra integrated GPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"SYCL_CACHE_PERSISTENT\"] = \"1\"\n",
+    "os.environ[\"BIGDL_LLM_XMX_DISABLED\"] = \"1\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### For Windows Users with Intel Arc A-Series GPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"SYCL_CACHE_PERSISTENT\"] = \"1\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **Note**\n",
+    ">\n",
+    "> For the first time that each model runs on Intel iGPU/Intel Arc A300-Series or Pro A60, it may take several minutes to compile.\n",
+    ">\n",
+    "> For other GPU type, please refer to [here](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Overview/install_gpu.md#runtime-configuration) for Windows users, and  [here](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Overview/install_gpu.md#runtime-configuration-1) for Linux users.\n",
+    "\n",
+    "\n",
+    "### Basic Usage\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import warnings\n",
+    "\n",
+    "from langchain.chains import LLMChain\n",
+    "from langchain_community.llms import IpexLLM\n",
+    "from langchain_core.prompts import PromptTemplate\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\", category=UserWarning, message=\".*padding_mask.*\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Specify the prompt template for your model. In this example, we use the [vicuna-1.5](https://huggingface.co/lmsys/vicuna-7b-v1.5) model. If you're working with a different model, choose a proper template accordingly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "template = \"USER: {question}\\nASSISTANT:\"\n",
+    "prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load the model locally using IpexLLM using `IpexLLM.from_model_id`. It will load the model directly in its Huggingface format and convert it automatically to low-bit format for inference. Set `device` to `\"xpu\"` in `model_kwargs` when initializing IpexLLM in order to load the LLM model to Intel GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = IpexLLM.from_model_id(\n",
+    "    model_id=\"lmsys/vicuna-7b-v1.5\",\n",
+    "    model_kwargs={\n",
+    "        \"temperature\": 0,\n",
+    "        \"max_length\": 64,\n",
+    "        \"trust_remote_code\": True,\n",
+    "        \"device\": \"xpu\",\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use it in Chains"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_chain = prompt | llm\n",
+    "\n",
+    "question = \"What is AI?\"\n",
+    "output = llm_chain.invoke(question)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save/Load Low-bit Model\n",
+    "Alternatively, you might save the low-bit model to disk once and use `from_model_id_low_bit` instead of `from_model_id` to reload it for later use - even across different machines. It is space-efficient, as the low-bit model demands significantly less disk space than the original model. And `from_model_id_low_bit` is also more efficient than `from_model_id` in terms of speed and memory usage, as it skips the model conversion step. You can similarly set `device` to `\"xpu\"` in `model_kwargs` in order to load the LLM model to Intel GPU. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To save the low-bit model, use `save_low_bit` as follows."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "saved_lowbit_model_path = \"./vicuna-7b-1.5-low-bit\"  # path to save low-bit model\n",
+    "llm.model.save_low_bit(saved_lowbit_model_path)\n",
+    "del llm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load the model from saved lowbit model path as follows. \n",
+    "> Note that the saved path for the low-bit model only includes the model itself but not the tokenizers. If you wish to have everything in one place, you will need to manually download or copy the tokenizer files from the original model's directory to the location where the low-bit model is saved."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_lowbit = IpexLLM.from_model_id_low_bit(\n",
+    "    model_id=saved_lowbit_model_path,\n",
+    "    tokenizer_id=\"lmsys/vicuna-7b-v1.5\",\n",
+    "    # tokenizer_name=saved_lowbit_model_path,  # copy the tokenizers to saved path if you want to use it this way\n",
+    "    model_kwargs={\n",
+    "        \"temperature\": 0,\n",
+    "        \"max_length\": 64,\n",
+    "        \"trust_remote_code\": True,\n",
+    "        \"device\": \"xpu\",\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the loaded model in Chains:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_chain = prompt | llm_lowbit\n",
+    "\n",
+    "\n",
+    "question = \"What is AI?\"\n",
+    "output = llm_chain.invoke(question)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IPEX-LLM on Intel CPU\n",
+    "\n",
+    "This example goes over how to use LangChain to interact with `ipex-llm` for text generation on Intel CPU.\n",
+    "\n",
+    "### Setup"
   ]
  },
  {
@@ -33,7 +289,9 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Install IEPX-LLM for running LLMs locally on Intel CPU."
+    "Install IEPX-LLM for running LLMs locally on Intel CPU:\n",
+    "\n",
+    "#### For Windows users:"
   ]
  },
  {
@@ -49,7 +307,23 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Basic Usage"
+    "#### For Linux users:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Basic Usage"
   ]
  },
  {
@@ -126,15 +400,11 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Save/Load Low-bit Model\n",
-    "Alternatively, you might save the low-bit model to disk once and use `from_model_id_low_bit` instead of `from_model_id` to reload it for later use - even across different machines. It is space-efficient, as the low-bit model demands significantly less disk space than the original model. And `from_model_id_low_bit` is also more efficient than `from_model_id` in terms of speed and memory usage, as it skips the model conversion step."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To save the low-bit model, use `save_low_bit` as follows."
+    "### Save/Load Low-bit Model\n",
+    "\n",
+    "Alternatively, you might save the low-bit model to disk once and use `from_model_id_low_bit` instead of `from_model_id` to reload it for later use - even across different machines. It is space-efficient, as the low-bit model demands significantly less disk space than the original model. And `from_model_id_low_bit` is also more efficient than `from_model_id` in terms of speed and memory usage, as it skips the model conversion step.\n",
+    "\n",
+    "To save the low-bit model, use `save_low_bit` as follows:"
   ]
  },
  {
@@ -152,7 +422,8 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Load the model from saved lowbit model path as follows. \n",
+    "Load the model from saved lowbit model path as follows.\n",
+    "\n",
    "> Note that the saved path for the low-bit model only includes the model itself but not the tokenizers. If you wish to have everything in one place, you will need to manually download or copy the tokenizer files from the original model's directory to the location where the low-bit model is saved."
   ]
  },
@@ -192,22 +463,8 @@
  }
 ],
 "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "name": "python"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/llms/openai.ipynb
+++ b/docs/docs/integrations/llms/openai.ipynb
@@ -42,18 +42,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "efcdb2b6",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Enter your OpenAI API key:  ········\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import getpass\n",
    "import os\n",
@@ -72,7 +64,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "id": "52fa46e8",
   "metadata": {},
   "outputs": [],
@@ -122,7 +114,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "id": "6fb585dd",
   "metadata": {
    "tags": []
@@ -144,17 +136,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "id": "85b49da0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "\"\\n\\nI'm an AI language model created by OpenAI, so I don't have feelings or emotions. But thank you for asking! How can I assist you today?\""
+       "'\\n\\nI am an AI and do not have emotions like humans do, so I am always functioning at my optimal level. Thank you for asking! How can I assist you today?'"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -173,16 +165,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "a641dbd9",
   "metadata": {
    "tags": []
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\nIch liebe Programmieren.'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "from langchain_core.prompts import PromptTemplate\n",
    "\n",
-    "prompt = PromptTemplate(\"How to say {input} in {output_language}:\\n\")\n",
+    "prompt = PromptTemplate.from_template(\"How to say {input} in {output_language}:\\n\")\n",
    "\n",
    "chain = prompt | llm\n",
    "chain.invoke(\n",
@@ -205,7 +208,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "id": "55142cec",
   "metadata": {},
   "outputs": [],
@@ -221,8 +224,8 @@
   ]
  },
  {
-   "cell_type": "markdown",
-   "id": "73e207dd",
+   "cell_type": "raw",
+   "id": "2fd99e97-013f-4c28-bb47-426faa42a2cf",
   "metadata": {},
   "source": [
    "## API reference\n",
@@ -247,7 +250,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.4"
  },
  "vscode": {
   "interpreter": {
--- a/docs/docs/integrations/llms/predibase.ipynb
+++ b/docs/docs/integrations/llms/predibase.ipynb
@@ -70,6 +70,10 @@
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"e2e_nlg\",\n",
    "    adapter_version=1,\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -87,6 +91,10 @@
    "    predibase_api_key=os.environ.get(\"PREDIBASE_API_TOKEN\"),\n",
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"predibase/e2e_nlg\",\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -96,7 +104,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "response = model.invoke(\"Can you recommend me a nice dry wine?\")\n",
+    "# Optionally use `kwargs` to dynamically overwrite \"generate()\" settings.\n",
+    "response = model.invoke(\n",
+    "    \"Can you recommend me a nice dry wine?\",\n",
+    "    **{\"temperature\": 0.5, \"max_new_tokens\": 1024},\n",
+    ")\n",
    "print(response)"
   ]
  },
@@ -127,6 +139,10 @@
    "    model=\"mistral-7b\",\n",
    "    predibase_api_key=os.environ.get(\"PREDIBASE_API_TOKEN\"),\n",
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -147,6 +163,10 @@
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"e2e_nlg\",\n",
    "    adapter_version=1,\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -162,6 +182,10 @@
    "    predibase_api_key=os.environ.get(\"PREDIBASE_API_TOKEN\"),\n",
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"predibase/e2e_nlg\",\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -259,6 +283,10 @@
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"my-finetuned-adapter-id\",  # Supports both, Predibase-hosted and HuggingFace-hosted adapter repositories.\n",
    "    adapter_version=1,  # required for Predibase-hosted adapters (ignored for HuggingFace-hosted adapters)\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")\n",
    "# replace my-base-LLM with the name of your choice of a serverless base model in Predibase"
   ]
@@ -269,7 +297,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# response = model.invoke(\"Can you help categorize the following emails into positive, negative, and neutral?\")"
+    "# Optionally use `kwargs` to dynamically overwrite \"generate()\" settings.\n",
+    "# response = model.invoke(\"Can you help categorize the following emails into positive, negative, and neutral?\", **{\"temperature\": 0.5, \"max_new_tokens\": 1024})"
   ]
  }
 ],
--- a/docs/docs/integrations/platforms/anthropic.mdx
+++ b/docs/docs/integrations/platforms/anthropic.mdx
@@ -26,6 +26,7 @@ from langchain_anthropic import ChatAnthropic
 model = ChatAnthropic(model='claude-3-opus-20240229')
 ```

+
 ## LLMs

 ### [Legacy] AnthropicLLM
--- a/docs/docs/integrations/platforms/aws.mdx
+++ b/docs/docs/integrations/platforms/aws.mdx
@@ -204,7 +204,7 @@ AWS offers services for computing, databases, storage, analytics, and other func
 See a [usage example](/docs/integrations/vectorstores/documentdb).

 ```python
-from langchain.vectorstores import DocumentDBVectorSearch
+from langchain_community.vectorstores import DocumentDBVectorSearch
 ```
 ### Amazon MemoryDB 
 [Amazon MemoryDB](https://aws.amazon.com/memorydb/) is a durable, in-memory database service that delivers ultra-fast performance. MemoryDB is compatible with Redis OSS, a popular open source data store, 
@@ -305,7 +305,7 @@ pip install boto3
 See a [usage example](/docs/integrations/memory/aws_dynamodb).

 ```python
-from langchain.memory import DynamoDBChatMessageHistory
+from langchain_community.chat_message_histories import DynamoDBChatMessageHistory
 ```

 ## Graphs
@@ -333,6 +333,12 @@ from langchain_community.chains.graph_qa.neptune_sparql import NeptuneSparqlQACh

 ## Callbacks

+### Bedrock token usage
+
+```python
+from langchain_community.callbacks.bedrock_anthropic_callback import BedrockAnthropicTokenUsageCallbackHandler
+```
+
 ### SageMaker Tracking

 >[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a fully managed service that is used to quickly 
@@ -351,7 +357,7 @@ pip install google-search-results sagemaker
 See a [usage example](/docs/integrations/callbacks/sagemaker_tracking).

 ```python
-from langchain.callbacks import SageMakerCallbackHandler
+from langchain_community.callbacks import SageMakerCallbackHandler
 ```

 ## Chains
--- a/docs/docs/integrations/platforms/google.mdx
+++ b/docs/docs/integrations/platforms/google.mdx
@@ -58,7 +58,7 @@ The value of image_url can be any of the following:

 ### Vertex AI

-Access PaLM chat models like `chat-bison` and `codechat-bison` via Google Cloud.
+Access chat models like `Gemini` via Google Cloud.

 We need to install `langchain-google-vertexai` python package.

@@ -72,6 +72,122 @@ See a [usage example](/docs/integrations/chat/google_vertex_ai_palm).
 from langchain_google_vertexai import ChatVertexAI
 ```

+### Chat Anthropic on Vertex AI Model Garden
+
+See a [usage example](/docs/integrations/llms/google_vertex_ai_palm).
+
+```python
+from langchain_google_vertexai.model_garden import ChatAnthropicVertex
+```
+
+### Chat Llama on Vertex AI Model Garden
+
+```python
+from langchain_google_vertexai.model_garden_maas.llama import VertexModelGardenLlama
+```
+
+### Chat Mistral on Vertex AI Model Garden
+
+```python
+from langchain_google_vertexai.model_garden_maas.mistral import VertexModelGardenMistral
+```
+
+### Chat Gemma local from Hugging Face
+
+>Local `Gemma` model loaded from `HuggingFace`.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaChatLocalHF
+```
+
+### Chat Gemma local from Kaggle
+
+>Local `Gemma` model loaded from `Kaggle`.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaChatLocalKaggle
+```
+
+### Chat Gemma on Vertex AI Model Garden
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaChatVertexAIModelGarden
+```
+
+### Vertex AI image captioning chat
+
+>Implementation of the `Image Captioning model` as a chat.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIImageCaptioningChat
+```
+
+### Vertex AI image editor chat
+
+>Given an image and a prompt, edit the image. Currently only supports mask-free editing.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIImageEditorChat
+```
+
+### Vertex AI image generator chat
+
+>Generates an image from a prompt.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIImageGeneratorChat
+```
+
+### Vertex AI visual QnA chat
+
+>Chat implementation of a visual QnA model
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIVisualQnAChat
+```
+
 ## LLMs

 ### Google Generative AI
@@ -106,9 +222,63 @@ See a [usage example](/docs/integrations/llms/google_vertex_ai_palm#vertex-model
 from langchain_google_vertexai import VertexAIModelGarden
 ```

+### Gemma local from Hugging Face
+
+>Local `Gemma` model loaded from `HuggingFace`.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaLocalHF
+```
+
+### Gemma local from Kaggle
+
+>Local `Gemma` model loaded from `Kaggle`.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaLocalKaggle
+```
+
+### Gemma on Vertex AI Model Garden
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaVertexAIModelGarden
+```
+
+### Vertex AI image captioning
+
+>Implementation of the `Image Captioning model` as an LLM.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIImageCaptioning
+```
+
 ## Embedding models

-### Google Generative AI Embeddings
+### Google Generative AI embedding

 See a [usage example](/docs/integrations/text_embedding/google_generative_ai).

@@ -126,6 +296,18 @@ export GOOGLE_API_KEY=your-api-key
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 ```

+### Google Generative AI server-side embedding
+
+Install the python package:
+
+```bash
+pip install langchain-google-genai
+```
+
+```python
+from langchain_google_genai.google_vector_store import ServerSideEmbedding
+```
+
 ### Vertex AI

 We need to install `langchain-google-vertexai` python package.
@@ -140,7 +322,7 @@ See a [usage example](/docs/integrations/text_embedding/google_vertex_ai_palm).
 from langchain_google_vertexai import VertexAIEmbeddings
 ```

-### Palm Embedding
+### Palm embedding

 We need to install `langchain-community` python package.

@@ -189,6 +371,7 @@ from langchain_google_community import BigQueryLoader
 ### Bigtable

 > [Google Cloud Bigtable](https://cloud.google.com/bigtable/docs) is Google's fully managed NoSQL Big Data database service in Google Cloud.
+
 Install the python package:

 ```bash
@@ -204,6 +387,7 @@ from langchain_google_bigtable import BigtableLoader
 ### Cloud SQL for MySQL

 > [Google Cloud SQL for MySQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your MySQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -213,12 +397,13 @@ pip install langchain-google-cloud-sql-mysql
 See [usage example](/docs/integrations/document_loaders/google_cloud_sql_mysql).

 ```python
-from langchain_google_cloud_sql_mysql import MySQLEngine, MySQLDocumentLoader
+from langchain_google_cloud_sql_mysql import MySQLEngine, MySQLLoader
 ```

 ### Cloud SQL for SQL Server

 > [Google Cloud SQL for SQL Server](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your SQL Server databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -234,6 +419,7 @@ from langchain_google_cloud_sql_mssql import MSSQLEngine, MSSQLLoader
 ### Cloud SQL for PostgreSQL

 > [Google Cloud SQL for PostgreSQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your PostgreSQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -318,6 +504,7 @@ from langchain_google_community import GoogleDriveLoader
 ### Firestore (Native Mode)

 > [Google Cloud Firestore](https://cloud.google.com/firestore/docs/) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
+
 Install the python package:

 ```bash
@@ -334,6 +521,7 @@ from langchain_google_firestore import FirestoreLoader

 > [Google Cloud Firestore in Datastore mode](https://cloud.google.com/datastore/docs) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
 > Firestore is the newest version of Datastore and introduces several improvements over Datastore.
+
 Install the python package:

 ```bash
@@ -349,6 +537,7 @@ from langchain_google_datastore import DatastoreLoader
 ### Memorystore for Redis

 > [Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) is a fully managed Redis service for Google Cloud. Applications running on Google Cloud can achieve extreme performance by leveraging the highly scalable, available, secure Redis service without the burden of managing complex Redis deployments.
+
 Install the python package:

 ```bash
@@ -358,12 +547,13 @@ pip install langchain-google-memorystore-redis
 See [usage example](/docs/integrations/document_loaders/google_memorystore_redis).

 ```python
-from langchain_google_memorystore_redis import MemorystoreLoader
+from langchain_google_memorystore_redis import MemorystoreDocumentLoader
 ```

 ### Spanner

 > [Google Cloud Spanner](https://cloud.google.com/spanner/docs) is a fully managed, mission-critical, relational database service on Google Cloud that offers transactional consistency at global scale, automatic, synchronous replication for high availability, and support for two SQL dialects: GoogleSQL (ANSI 2011 with extensions) and PostgreSQL.
+
 Install the python package:

 ```bash
@@ -482,6 +672,7 @@ from langchain.vectorstores import BigQueryVectorSearch
 ### Memorystore for Redis

 > [Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) is a fully managed Redis service for Google Cloud. Applications running on Google Cloud can achieve extreme performance by leveraging the highly scalable, available, secure Redis service without the burden of managing complex Redis deployments.
+
 Install the python package:

 ```bash
@@ -497,6 +688,7 @@ from langchain_google_memorystore_redis import RedisVectorStore
 ### Spanner

 > [Google Cloud Spanner](https://cloud.google.com/spanner/docs) is a fully managed, mission-critical, relational database service on Google Cloud that offers transactional consistency at global scale, automatic, synchronous replication for high availability, and support for two SQL dialects: GoogleSQL (ANSI 2011 with extensions) and PostgreSQL.
+
 Install the python package:

 ```bash
@@ -512,6 +704,7 @@ from langchain_google_spanner import SpannerVectorStore
 ### Firestore (Native Mode)

 > [Google Cloud Firestore](https://cloud.google.com/firestore/docs/) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
+
 Install the python package:

 ```bash
@@ -521,12 +714,13 @@ pip install langchain-google-firestore
 See [usage example](/docs/integrations/vectorstores/google_firestore).

 ```python
-from langchain_google_firestore import FirestoreVectorstore
+from langchain_google_firestore import FirestoreVectorStore
 ```

 ### Cloud SQL for MySQL

 > [Google Cloud SQL for MySQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your MySQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -542,6 +736,7 @@ from langchain_google_cloud_sql_mysql import MySQLEngine, MySQLVectorStore
 ### Cloud SQL for PostgreSQL

 > [Google Cloud SQL for PostgreSQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your PostgreSQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -573,6 +768,52 @@ See a [usage example](/docs/integrations/vectorstores/google_vertex_ai_vector_se
 from langchain_google_vertexai import VectorSearchVectorStore
 ```

+### Vertex AI Vector Search with DataStore
+
+> VectorSearch with DatasTore document storage.
+
+Install the python package:
+
+```bash
+pip install langchain-google-vertexai
+```
+
+See a [usage example](/docs/integrations/vectorstores/google_vertex_ai_vector_search/#optional--you-can-also-create-vectore-and-store-chunks-in-a-datastore).
+
+```python
+from langchain_google_vertexai import VectorSearchVectorStoreDatastore
+```
+
+### VectorSearchVectorStoreGCS 
+
+> Alias of `VectorSearchVectorStore` for consistency 
+> with the rest of vector stores with different document storage backends.
+
+Install the python package:
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai import VectorSearchVectorStoreGCS
+```
+
+### Google Generative AI Vector Store 
+
+> Currently, it computes the embedding vectors on the server side.
+> For more information visit [Guide](https://developers.generativeai.google/guide).
+
+Install the python package:
+
+```bash
+pip install langchain-google-genai
+```
+
+```python
+from langchain_google_genai.google_vector_store import GoogleVectorStore
+```
+
 ### ScaNN

 >[Google ScaNN](https://github.com/google-research/google-research/tree/master/scann)
@@ -605,7 +846,7 @@ from langchain_community.vectorstores import ScaNN
 We need to install several python packages.

 ```bash
-pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
+pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib langchain-googledrive
 ```

 See a [usage example and authorization instructions](/docs/integrations/retrievers/google_drive).
@@ -619,16 +860,38 @@ from langchain_googledrive.retrievers import GoogleDriveRetriever
 > [Vertex AI Search](https://cloud.google.com/generative-ai-app-builder/docs/introduction)
 > from Google Cloud allows developers to quickly build generative AI powered search engines for customers and employees.

+See a [usage example](/docs/integrations/retrievers/google_vertex_ai_search).
+
+Note: `GoogleVertexAISearchRetriever` is deprecated, use `VertexAIMultiTurnSearchRetriever`,
+`VertexAISearchSummaryTool`, and `VertexAISearchRetriever` (see below).
+
+#### GoogleVertexAISearchRetriever
+
 We need to install the `google-cloud-discoveryengine` python package.

 ```bash
 pip install google-cloud-discoveryengine
 ```

-See a [usage example](/docs/integrations/retrievers/google_vertex_ai_search).
+```python
+from langchain_community.retrievers import GoogleVertexAISearchRetriever
+```
+
+#### VertexAIMultiTurnSearchRetriever

 ```python
-from langchain.retrievers import GoogleVertexAISearchRetriever
+from langchain_google_community import VertexAIMultiTurnSearchRetriever
+```
+#### VertexAISearchRetriever
+
+```python
+from langchain_google_community import VertexAIMultiTurnSearchRetriever
+```
+
+#### VertexAISearchSummaryTool
+
+```python
+from langchain_google_community import VertexAISearchSummaryTool
 ```

 ### Document AI Warehouse
@@ -662,10 +925,10 @@ from langchain_google_community.documentai_warehouse import DocumentAIWarehouseR
 > It applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks
 > to deliver the highest fidelity possible.

-We need to install a python package.
+We need to install python packages.

 ```bash
-pip install google-cloud-text-to-speech
+pip install google-cloud-text-to-speech langchain-google-community
 ```

 See a [usage example and authorization instructions](/docs/integrations/tools/google_cloud_texttospeech).
@@ -680,13 +943,14 @@ We need to install several python packages.

 ```bash
 pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
+pip install langchain-googledrive
 ```

 See a [usage example and authorization instructions](/docs/integrations/tools/google_drive).

 ```python
-from langchain_community.utilities.google_drive import GoogleDriveAPIWrapper
-from langchain_community.tools.google_drive.tool import GoogleDriveSearchTool
+from langchain_googledrive.utilities.google_drive import GoogleDriveAPIWrapper
+from langchain_googledrive.tools.google_drive.tool import GoogleDriveSearchTool
 ```

 ### Google Finance
@@ -776,6 +1040,23 @@ from langchain.agents import load_tools
 tools = load_tools(["google-search"])
 ```

+#### GoogleSearchResults
+
+Tool that queries the `Google Search` API (via `GoogleSearchAPIWrapper`) and gets back JSON.
+
+```python
+from langchain_community.tools import GoogleSearchResults
+```
+
+#### GoogleSearchRun
+
+Tool that queries the `Google Search` API (via `GoogleSearchAPIWrapper`).
+
+```python
+from langchain_community.tools import GoogleSearchRun
+```
+
+
 ### Google Trends

 We need to install a python package.
@@ -810,6 +1091,18 @@ See a [usage example and authorization instructions](/docs/integrations/tools/gm
 from langchain_google_community import GmailToolkit
 ```

+#### GMail individual tools
+
+You can use individual tools from GMail Toolkit.
+
+```python
+from langchain_google_community.gmail.create_draft import GmailCreateDraft
+from langchain_google_community.gmail.get_message import GmailGetMessage
+from langchain_google_community.gmail.get_thread import GmailGetThread
+from langchain_google_community.gmail.search import GmailSearch
+from langchain_google_community.gmail.send_message import GmailSendMessage
+```
+
 ## Memory

 ### AlloyDB for PostgreSQL
@@ -831,6 +1124,7 @@ from langchain_google_alloydb_pg import AlloyDBEngine, AlloyDBChatMessageHistory
 ### Cloud SQL for PostgreSQL

 > [Cloud SQL for PostgreSQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your PostgreSQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -847,6 +1141,7 @@ from langchain_google_cloud_sql_pg import PostgresEngine, PostgresChatMessageHis
 ### Cloud SQL for MySQL

 > [Cloud SQL for MySQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your MySQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -862,6 +1157,7 @@ from langchain_google_cloud_sql_mysql import MySQLEngine, MySQLChatMessageHistor
 ### Cloud SQL for SQL Server

 > [Cloud SQL for SQL Server](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your SQL Server databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -877,6 +1173,7 @@ from langchain_google_cloud_sql_mssql import MSSQLEngine, MSSQLChatMessageHistor
 ### Spanner

 > [Google Cloud Spanner](https://cloud.google.com/spanner/docs) is a fully managed, mission-critical, relational database service on Google Cloud that offers transactional consistency at global scale, automatic, synchronous replication for high availability, and support for two SQL dialects: GoogleSQL (ANSI 2011 with extensions) and PostgreSQL.
+
 Install the python package:

 ```bash
@@ -892,6 +1189,7 @@ from langchain_google_spanner import SpannerChatMessageHistory
 ### Memorystore for Redis

 > [Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) is a fully managed Redis service for Google Cloud. Applications running on Google Cloud can achieve extreme performance by leveraging the highly scalable, available, secure Redis service without the burden of managing complex Redis deployments.
+
 Install the python package:

 ```bash
@@ -907,6 +1205,7 @@ from langchain_google_memorystore_redis import MemorystoreChatMessageHistory
 ### Bigtable

 > [Google Cloud Bigtable](https://cloud.google.com/bigtable/docs) is Google's fully managed NoSQL Big Data database service in Google Cloud.
+
 Install the python package:

 ```bash
@@ -922,6 +1221,7 @@ from langchain_google_bigtable import BigtableChatMessageHistory
 ### Firestore (Native Mode)

 > [Google Cloud Firestore](https://cloud.google.com/firestore/docs/) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
+
 Install the python package:

 ```bash
@@ -938,6 +1238,7 @@ from langchain_google_firestore import FirestoreChatMessageHistory

 > [Google Cloud Firestore in Datastore mode](https://cloud.google.com/datastore/docs) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
 > Firestore is the newest version of Datastore and introduces several improvements over Datastore.
+
 Install the python package:

 ```bash
@@ -966,6 +1267,22 @@ See [usage example](/docs/integrations/memory/google_el_carro).
 from langchain_google_el_carro import ElCarroChatMessageHistory
 ```

+## Callbacks
+
+### Vertex AI callback handler
+
+>Callback Handler that tracks `VertexAI` info.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.callbacks import VertexAICallbackHandler
+```
+
 ## Chat Loaders

 ### GMail
@@ -985,6 +1302,30 @@ See a [usage example and authorization instructions](/docs/integrations/chat_loa
 from langchain_google_community import GMailLoader
 ```

+## Evaluators
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+### VertexPairWiseStringEvaluator
+
+>Pair-wise evaluation of the perplexity of a predicted string.
+
+```python
+from langchain_google_vertexai.evaluators.evaluation import VertexPairWiseStringEvaluator
+```
+
+### VertexStringEvaluator
+
+>Evaluate the perplexity of a predicted string.
+
+```python
+from langchain_google_vertexai.evaluators.evaluation import VertexPairWiseStringEvaluator
+```
+
 ## 3rd Party Integrations

 ### SearchApi
--- a/docs/docs/integrations/platforms/huggingface.mdx
+++ b/docs/docs/integrations/platforms/huggingface.mdx
@@ -54,7 +54,7 @@ from langchain_community.embeddings import HuggingFaceInstructEmbeddings

 ### HuggingFaceBgeEmbeddings

->[BGE models on the HuggingFace](https://huggingface.co/BAAI/bge-large-en) are [the best open-source embedding models](https://huggingface.co/spaces/mteb/leaderboard).
+>[BGE models on the HuggingFace](https://huggingface.co/BAAI/bge-large-en-v1.5) are one of [the best open-source embedding models](https://huggingface.co/spaces/mteb/leaderboard).
 >BGE model is created by the [Beijing Academy of Artificial Intelligence (BAAI)](https://en.wikipedia.org/wiki/Beijing_Academy_of_Artificial_Intelligence). `BAAI` is a private non-profit organization engaged in AI research and development.

 See a [usage example](/docs/integrations/text_embedding/bge_huggingface).
@@ -86,10 +86,10 @@ from langchain_community.embeddings import HuggingFaceHubEmbeddings

 ### Hugging Face dataset

->[Hugging Face Hub](https://huggingface.co/docs/hub/index) is home to over 75,000 
-> [datasets](https://huggingface.co/docs/hub/index#datasets) in more than 100 languages 
+>[Hugging Face Hub](https://huggingface.co/docs/hub/index) is home to over 75,000
+> [datasets](https://huggingface.co/docs/hub/index#datasets) in more than 100 languages
 > that can be used for a broad range of tasks across NLP, Computer Vision, and Audio.
-> They used for a diverse range of tasks such as translation, automatic speech 
+> They used for a diverse range of tasks such as translation, automatic speech
 > recognition, and image classification.

 We need to install `datasets` python package.
@@ -110,7 +110,7 @@ from langchain_community.document_loaders.hugging_face_dataset import HuggingFac

 ### Hugging Face Hub Tools

->[Hugging Face Tools](https://huggingface.co/docs/transformers/v4.29.0/en/custom_tools) 
+>[Hugging Face Tools](https://huggingface.co/docs/transformers/v4.29.0/en/custom_tools)
 > support text I/O and are loaded using the `load_huggingface_tool` function.

 We need to install several python packages.
--- a/docs/docs/integrations/providers/arxiv.mdx
+++ b/docs/docs/integrations/providers/arxiv.mdx
@@ -32,5 +32,5 @@ from langchain_community.document_loaders import ArxivLoader
 See a [usage example](/docs/integrations/retrievers/arxiv).

 ```python
-from langchain.retrievers import ArxivRetriever
+from langchain_community.retrievers import ArxivRetriever
 ```
--- a/docs/docs/integrations/providers/baidu.mdx
+++ b/docs/docs/integrations/providers/baidu.mdx
@@ -24,6 +24,7 @@ from langchain_community.llms import QianfanLLMEndpoint
 ### Qianfan Chat Endpoint

 See a [usage example](/docs/integrations/chat/baidu_qianfan_endpoint).
+See another [usage example](/docs/integrations/chat/ernie).

 ```python
 from langchain_community.chat_models import QianfanChatEndpoint
@@ -34,11 +35,26 @@ from langchain_community.chat_models import QianfanChatEndpoint
 ### Baidu Qianfan

 See a [usage example](/docs/integrations/text_embedding/baidu_qianfan_endpoint).
+See another [usage example](/docs/integrations/text_embedding/ernie).

 ```python
 from langchain_community.embeddings import QianfanEmbeddingsEndpoint
 ```

+## Document loaders
+
+### Baidu BOS Directory Loader
+
+```python
+from langchain_community.document_loaders.baiducloud_bos_directory import BaiduBOSDirectoryLoader
+```
+
+### Baidu BOS File Loader
+
+```python
+from langchain_community.document_loaders.baiducloud_bos_file import BaiduBOSFileLoader
+```
+
 ## Vector stores

 ### Baidu Cloud ElasticSearch VectorSearch
--- a/docs/docs/integrations/providers/bookendai.mdx
+++ b/docs/docs/integrations/providers/bookendai.mdx
@@ -0,0 +1,18 @@
+# bookend.ai
+
+LangChain implements an integration with embeddings provided by [bookend.ai](https://bookend.ai/).
+
+
+## Installation and Setup
+
+
+You need to register and get the `API_KEY` 
+from the [bookend.ai](https://bookend.ai/) website.
+
+## Embedding model
+
+See a [usage example](/docs/integrations/text_embedding/bookend).
+
+```python
+from langchain_community.embeddings import BookendEmbeddings
+```
--- a/docs/docs/integrations/providers/cassandra.mdx
+++ b/docs/docs/integrations/providers/cassandra.mdx
@@ -83,3 +83,28 @@ from langchain_community.agent_toolkits.cassandra_database.toolkit import (
 Learn more in the [example notebook](/docs/integrations/tools/cassandra_database).


+Cassandra Database individual tools:
+
+### Get Schema
+
+Tool for getting the schema of a keyspace in an Apache Cassandra database.
+
+```python
+from langchain_community.tools import GetSchemaCassandraDatabaseTool
+```
+
+### Get Table Data
+
+Tool for getting data from a table in an Apache Cassandra database.
+
+```python
+from langchain_community.tools import GetTableDataCassandraDatabaseTool
+```
+
+### Query
+
+Tool for querying an Apache Cassandra database with provided CQL.
+
+```python
+from langchain_community.tools import QueryCassandraDatabaseTool
+```
--- a/docs/docs/integrations/providers/cerebras.mdx
+++ b/docs/docs/integrations/providers/cerebras.mdx
@@ -0,0 +1,30 @@
+# Cerebras
+
+At Cerebras, we've developed the world's largest and fastest AI processor, the Wafer-Scale Engine-3 (WSE-3). The Cerebras CS-3 system, powered by the WSE-3, represents a new class of AI supercomputer that sets the standard for generative AI training and inference with unparalleled performance and scalability.
+
+With Cerebras as your inference provider, you can:
+- Achieve unprecedented speed for AI inference workloads
+- Build commercially with high throughput
+- Effortlessly scale your AI workloads with our seamless clustering technology
+
+Our CS-3 systems can be quickly and easily clustered to create the largest AI supercomputers in the world, making it simple to place and run the largest models. Leading corporations, research institutions, and governments are already using Cerebras solutions to develop proprietary models and train popular open-source models.
+
+Want to experience the power of Cerebras? Check out our [website](https://cerebras.ai) for more resources and explore options for accessing our technology through the Cerebras Cloud or on-premise deployments!
+
+For more information about Cerebras Cloud, visit [cloud.cerebras.ai](https://cloud.cerebras.ai/). Our API reference is available at [inference-docs.cerebras.ai](https://inference-docs.cerebras.ai/).
+
+## Installation and Setup
+Install the integration package:
+
+```bash
+pip install langchain-cerebras
+```
+
+## API Key
+Get an API Key from [cloud.cerebras.ai](https://cloud.cerebras.ai/) and add it to your environment variables:
+```
+export CEREBRAS_API_KEY="your-api-key-here"
+```
+
+## Chat Model
+See a [usage example](/docs/integrations/chat/cerebras).
--- a/docs/docs/integrations/providers/coze.mdx
+++ b/docs/docs/integrations/providers/coze.mdx
@@ -0,0 +1,19 @@
+# Coze
+
+[Coze](https://www.coze.com/) is an AI chatbot development platform that enables
+the creation and deployment of chatbots for handling diverse conversations across
+various applications.
+
+
+## Installation and Setup
+
+First, you need to get the `API_KEY` from the [Coze](https://www.coze.com/) website.
+
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/coze/).
+
+```python
+from langchain_community.chat_models import ChatCoze
+```
--- a/docs/docs/integrations/providers/dappierai.mdx
+++ b/docs/docs/integrations/providers/dappierai.mdx
@@ -0,0 +1,18 @@
+# Dappier AI
+
+> [Dappier](https://platform.dappier.com/) is a platform enabling access to diverse, 
+> real-time data models. Enhance your AI applications with `Dappier’s` pre-trained, 
+> LLM-ready data models and ensure accurate, current responses with reduced inaccuracies.
+
+## Installation and Setup
+
+To use one of the `Dappier AI` Data Models, you will need an API key. Visit 
+[Dappier Platform](https://platform.dappier.com/) to log in and create an API key in your profile.
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/dappier).
+
+```python
+from langchain_community.chat_models import ChatDappierAI
+```
--- a/docs/docs/integrations/providers/databricks.md
+++ b/docs/docs/integrations/providers/databricks.md
@@ -11,13 +11,22 @@ Databricks embraces the LangChain ecosystem in various ways:
 4. 🌐 **SQL Database** - [Databricks SQL](https://www.databricks.com/product/databricks-sql) is integrated with `SQLDatabase` in LangChain, allowing you to access the auto-optimizing, exceptionally performant data warehouse.
 5. 💡 **Open Models** - Databricks open sources models, such as [DBRX](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm), which are available through the [Hugging Face Hub](https://huggingface.co/databricks/dbrx-instruct). These models can be directly utilized with LangChain, leveraging its integration with the `transformers` library.

+Installation
+------------
+
+First-party Databricks integrations are available in the langchain-databricks partner package.
+
+```
+pip install langchain-databricks
+```
+
 Chat Model
 ----------

 `ChatDatabricks` is a Chat Model class to access chat endpoints hosted on Databricks, including state-of-the-art models such as Llama3, Mixtral, and DBRX, as well as your own fine-tuned models.

 ```
-from langchain_community.chat_models.databricks import ChatDatabricks
+from langchain_databricks import ChatDatabricks

 chat_model = ChatDatabricks(endpoint="databricks-meta-llama-3-70b-instruct")
 ```
@@ -29,6 +38,10 @@ LLM

 `Databricks` is an LLM class to access completion endpoints hosted on Databricks.

+:::caution
+Text completion models have been deprecated and the latest and most popular models are [chat completion models](/docs/concepts/#chat-models). Use `ChatDatabricks` chat model instead to use those models and advanced features such as tool calling.
+:::
+
 ```
 from langchain_community.llm.databricks import Databricks

@@ -44,7 +57,7 @@ Embeddings
 `DatabricksEmbeddings` is an Embeddings class to access text-embedding endpoints hosted on Databricks, including state-of-the-art models such as BGE, as well as your own fine-tuned models.

 ```
-from langchain_community.embeddings import DatabricksEmbeddings
+from langchain_databricks import DatabricksEmbeddings

 embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
 ```
@@ -58,10 +71,15 @@ Vector Search
 Databricks Vector Search is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from [Delta](https://docs.databricks.com/en/introduction/delta-comparison.html) tables managed by [Unity Catalog](https://www.databricks.com/product/unity-catalog) and query them with a simple API to return the most similar vectors.

 ```
-from langchain_community.vectorstores import DatabricksVectorSearch
+from langchain_databricks.vectorstores import DatabricksVectorSearch

 dvs = DatabricksVectorSearch(
-    index, text_column="text", embedding=embeddings, columns=["source"]
+    endpoint="<YOUT_ENDPOINT_NAME>",
+    index_name="<YOUR_INDEX_NAME>",
+    index,
+    text_column="text",
+    embedding=embeddings,
+    columns=["source"]
 )
 docs = dvs.similarity_search("What is vector search?)
 ```
--- a/docs/docs/integrations/providers/dspy.ipynb
+++ b/docs/docs/integrations/providers/dspy.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# DSPy\n",
    "\n",
-    "[DSPy](https://github.com/stanfordnlp/dspy) is a fantastic framework for LLMs that introduces an automatic compiler that teaches LMs how to conduct the declarative steps in your program. Specifically, the DSPy compiler will internally trace your program and then craft high-quality prompts for large LMs (or train automatic finetunes for small LMs) to teach them the steps of your task.\n",
+    ">[DSPy](https://github.com/stanfordnlp/dspy) is a fantastic framework for LLMs that introduces an automatic compiler that teaches LMs how to conduct the declarative steps in your program. Specifically, the DSPy compiler will internally trace your program and then craft high-quality prompts for large LMs (or train automatic finetunes for small LMs) to teach them the steps of your task.\n",
    "\n",
    "Thanks to [Omar Khattab](https://twitter.com/lateinteraction) we have an integration! It works with any LCEL chains with some minor modifications.\n",
    "\n",
@@ -17,6 +17,9 @@
    "\n",
    "Let's take a look at an example. In this example we will make a simple RAG pipeline. We will use DSPy to \"compile\" our program and learn an optimized prompt.\n",
    "\n",
+    "This example uses the `ColBERTv2` model.\n",
+    "See the [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](https://arxiv.org/abs/2112.01488) paper.\n",
+    "\n",
    "\n",
    "## Install dependencies\n",
    "\n",
@@ -1175,7 +1178,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/providers/embedchain.mdx
+++ b/docs/docs/integrations/providers/embedchain.mdx
@@ -0,0 +1,25 @@
+# Embedchain
+
+> [Embedchain](https://github.com/embedchain/embedchain) is a RAG framework to create 
+> data pipelines. It loads, indexes, retrieves and syncs all the data.
+>
+>It is available as an [open source package](https://github.com/embedchain/embedchain) 
+> and as a [hosted platform solution](https://app.embedchain.ai/).
+ 
+
+## Installation and Setup
+
+Install the package using pip:
+
+```bash
+pip install embedchain
+```
+
+
+## Retriever
+
+See a [usage example](/docs/integrations/retrievers/embedchain).
+
+```python
+from langchain_community.retrievers import EmbedchainRetriever
+```
--- a/docs/docs/integrations/providers/everlyai.mdx
+++ b/docs/docs/integrations/providers/everlyai.mdx
@@ -0,0 +1,17 @@
+# Everly AI
+
+> [Everly AI](https://everlyai.xyz/) allows you to run your ML models at scale in the cloud. 
+> It also provides API access to [several LLM models](https://everlyai.xyz/).
+
+## Installation and Setup
+
+To use `Everly AI`, you will need an API key. Visit 
+[Everly AI](https://everlyai.xyz/) to create an API key in your profile.
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/everlyai).
+
+```python
+from langchain_community.chat_models import ChatEverlyAI
+```
--- a/docs/docs/integrations/providers/exa_search.ipynb
+++ b/docs/docs/integrations/providers/exa_search.ipynb
@@ -4,9 +4,14 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Exa Search\n",
+    "# Exa\n",
    "\n",
-    "Exa's search integration exists in its own [partner package](https://pypi.org/project/langchain-exa/). You can install it with:"
+    ">[Exa](https://exa.ai/) is a knowledge API for AI and developers.\n",
+    ">\n",
+    "\n",
+    "## Installation and Setup\n",
+    "\n",
+    "`Exa` integration exists in its own [partner package](https://pypi.org/project/langchain-exa/). You can install it with:"
   ]
  },
  {
@@ -26,7 +31,9 @@
    "\n",
    "## Retriever\n",
    "\n",
-    "You can use the [`ExaSearchRetriever`](/docs/integrations/tools/exa_search#using-exasearchretriever) in a standard retrieval pipeline. You can import it as follows"
+    "You can use the [`ExaSearchRetriever`](/docs/integrations/tools/exa_search#using-exasearchretriever) in a standard retrieval pipeline. You can import it as follows.\n",
+    "\n",
+    "See a [usage example](/docs/integrations/tools/exa_search).\n"
   ]
  },
  {
@@ -46,7 +53,40 @@
   "source": [
    "## Tools\n",
    "\n",
-    "You can use Exa as an agent tool as described in the [Exa tool calling docs](/docs/integrations/tools/exa_search#using-the-exa-sdk-as-langchain-agent-tools).\n"
+    "You can use Exa as an agent tool as described in the [Exa tool calling docs](/docs/integrations/tools/exa_search#using-the-exa-sdk-as-langchain-agent-tools).\n",
+    "\n",
+    "See a [usage example](/docs/integrations/tools/exa_search).\n",
+    "\n",
+    "### ExaFindSimilarResults\n",
+    "\n",
+    "A tool that queries the Metaphor Search API and gets back JSON."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_exa.tools import ExaFindSimilarResults"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ExaSearchResults\n",
+    "\n",
+    "Exa Search tool."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_exa.tools import ExaSearchResults"
   ]
  }
 ],
@@ -69,9 +109,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/providers/falkordb.mdx
+++ b/docs/docs/integrations/providers/falkordb.mdx
@@ -0,0 +1,26 @@
+# FalkorDB
+
+>[FalkorDB](https://www.falkordb.com/) is a creator of the [FalkorDB](https://docs.falkordb.com/), 
+> a low-latency Graph Database that delivers knowledge to GenAI.
+
+
+## Installation and Setup
+
+See [installation instructions here](/docs/integrations/graphs/falkordb/).
+
+
+## Graphs
+
+See a [usage example](/docs/integrations/graphs/falkordb).
+
+```python
+from langchain_community.graphs import FalkorDBGraph
+```
+
+## Chains
+
+See a [usage example](/docs/integrations/graphs/falkordb).
+
+```python
+from langchain_community.chains.graph_qa.falkordb import FalkorDBQAChain
+```
--- a/docs/docs/integrations/providers/firecrawl.mdx
+++ b/docs/docs/integrations/providers/firecrawl.mdx
@@ -0,0 +1,22 @@
+# FireCrawl
+
+>[FireCrawl](https://firecrawl.dev/?ref=langchain) crawls and converts any website into LLM-ready data. 
+> It crawls all accessible subpages and give you clean markdown 
+> and metadata for each. No sitemap required.
+
+
+## Installation and Setup
+
+Install the python SDK:
+
+```bash
+pip install firecrawl-py
+```
+
+## Document loader
+
+See a [usage example](/docs/integrations/document_loaders/firecrawl).
+
+```python
+from langchain_community.document_loaders import FireCrawlLoader
+```
--- a/docs/docs/integrations/providers/fireworks.md
+++ b/docs/docs/integrations/providers/fireworks.md
@@ -1,7 +1,9 @@
-# Fireworks
+# Fireworks AI
+
+>[Fireworks AI](https://fireworks.ai) is a generative AI inference platform to run and 
+> customize models with industry-leading speed and production-readiness.
+

-This page covers how to use [Fireworks](https://fireworks.ai/) models within
-Langchain.

 ## Installation and setup

@@ -14,7 +16,7 @@ Langchain.
 - Get a Fireworks API key by signing up at [fireworks.ai](https://fireworks.ai).
 - Authenticate by setting the FIREWORKS_API_KEY environment variable.

-## Authentication
+### Authentication

 There are two ways to authenticate using your Fireworks API key:

@@ -29,20 +31,26 @@ There are two ways to authenticate using your Fireworks API key:
    ```python
    llm = Fireworks(api_key="<KEY>")
    ```
+## Chat models

-## Using the Fireworks LLM module
+See a [usage example](/docs/integrations/chat/fireworks).

-Fireworks integrates with Langchain through the LLM module. In this example, we
-will work the mixtral-8x7b-instruct model. 
+```python
+from langchain_fireworks import ChatFireworks
+```
+
+## LLMs
+
+See a [usage example](/docs/integrations/llms/fireworks).

 ```python
 from langchain_fireworks import Fireworks 
-
-llm = Fireworks(
-    api_key="<KEY>",
-    model="accounts/fireworks/models/mixtral-8x7b-instruct",
-    max_tokens=256)
-llm("Name 3 sports.")
 ```

-For a more detailed walkthrough, see [here](/docs/integrations/llms/Fireworks).
+## Embedding models
+
+See a [usage example](/docs/integrations/text_embedding/fireworks).
+
+```python
+from langchain_fireworks import FireworksEmbeddings 
+```
--- a/docs/docs/integrations/providers/forefrontai.mdx
+++ b/docs/docs/integrations/providers/forefrontai.mdx
@@ -1,16 +1,19 @@
-# ForefrontAI
+# Forefront AI
+
+> [Forefront AI](https://forefront.ai/) is a platform enabling you to
+> fine-tune and inference open-source text generation models 

-This page covers how to use the ForefrontAI ecosystem within LangChain.
-It is broken into two parts: installation and setup, and then references to specific ForefrontAI wrappers.

 ## Installation and Setup
- Get an ForefrontAI api key and set it as an environment variable (`FOREFRONTAI_API_KEY`)

-## Wrappers
+Get an `ForefrontAI` API key
+visiting [this page](https://accounts.forefront.ai/sign-in?redirect_url=https%3A%2F%2Fforefront.ai%2Fapp%2Fapi-keys).
+ and set it as an environment variable (`FOREFRONTAI_API_KEY`).

-### LLM
+## LLM
+
+See a [usage example](/docs/integrations/llms/forefrontai).

-There exists an ForefrontAI LLM wrapper, which you can access with 
 ```python
 from langchain_community.llms import ForefrontAI
 ```
--- a/docs/docs/integrations/providers/friendli.md
+++ b/docs/docs/integrations/providers/friendli.md
@@ -0,0 +1,31 @@
+# Friendli AI
+
+>[FriendliAI](https://friendli.ai/) enhances AI application performance and optimizes 
+> cost savings with scalable, efficient deployment options, tailored for high-demand AI workloads.
+
+## Installation and setup
+
+Install the `friendli-client` python package.
+
+```bash
+pip install friendli-client
+```
+Sign in to [Friendli Suite](https://suite.friendli.ai/) to create a Personal Access Token, 
+and set it as the `FRIENDLI_TOKEN` environment variable.
+
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/friendli).
+
+```python
+from langchain_community.chat_models.friendli import ChatFriendli
+```
+
+## LLMs
+
+See a [usage example](/docs/integrations/llms/friendli).
+
+```python
+from langchain_community.llms.friendli import Friendli
+```
--- a/docs/docs/integrations/providers/friendly.md
+++ b/docs/docs/integrations/providers/friendly.md
@@ -0,0 +1,32 @@
+# Friendli AI
+
+>[Friendli AI](https://friendli.ai/) is a company that fine-tunes, deploys LLMs, 
+> and serves a wide range of Generative AI use cases.
+
+
+## Installation and setup
+
+- Install the integration package:
+
+  ```
+  pip install friendli-client
+  ```
+
+- Sign in to [Friendli Suite](https://suite.friendli.ai/) to create a Personal Access Token, 
+and set it as the `FRIENDLI_TOKEN` environment.
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/friendli).
+
+```python
+from langchain_community.chat_models.friendli import ChatFriendli
+```
+
+## LLMs
+
+See a [usage example](/docs/integrations/llms/friendli).
+
+```python
+from langchain_community.llms.friendli import Friendli
+```
--- a/docs/docs/integrations/providers/github.mdx
+++ b/docs/docs/integrations/providers/github.mdx
@@ -20,3 +20,26 @@ See a [usage example](/docs/integrations/document_loaders/github).
 ```python
 from langchain_community.document_loaders import GitHubIssuesLoader, GithubFileLoader
 ```
+
+## Tools/Toolkit
+
+### GitHubToolkit
+The `GitHub` toolkit contains tools that enable an LLM agent to interact 
+with a GitHub repository. 
+
+The toolkit is a wrapper for the `PyGitHub` library.
+
+```python
+from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit
+```
+
+Learn more in the [example notebook](/docs/integrations/tools/github).
+
+### GitHubAction
+
+Tool for interacting with the GitHub API.
+
+```python
+from langchain_community.tools.github.tool import GitHubAction
+```
+
--- a/docs/docs/integrations/providers/gitlab.mdx
+++ b/docs/docs/integrations/providers/gitlab.mdx
@@ -0,0 +1,31 @@
+# GitLab
+
+>[GitLab Inc.](https://about.gitlab.com/) is an open-core company 
+> that operates `GitLab`, a DevOps software package that can develop, 
+> secure, and operate software. `GitLab` includes a distributed version 
+> control based on Git, including features such as access control, bug tracking,
+> software feature requests, task management, and wikis for every project, 
+> as well as snippets. 
+
+
+## Tools/Toolkits
+
+### GitLabToolkit
+
+The `Gitlab` toolkit contains tools that enable an LLM agent to interact with a gitlab repository. 
+
+The toolkit is a wrapper for the `python-gitlab` library.
+
+See a [usage example](/docs/integrations/tools/gitlab).
+
+```python
+from langchain_community.agent_toolkits.gitlab.toolkit import GitLabToolkit
+```
+
+### GitLabAction
+
+Tool for interacting with the GitLab API.
+
+```python
+from langchain_community.tools.github.tool import GitHubAction
+```
--- a/docs/docs/integrations/providers/gooseai.mdx
+++ b/docs/docs/integrations/providers/gooseai.mdx
@@ -1,9 +1,13 @@
 # GooseAI

-This page covers how to use the GooseAI ecosystem within LangChain.
-It is broken into two parts: installation and setup, and then references to specific GooseAI wrappers.
+>[GooseAI](https://goose.ai) makes deploying NLP services easier and more accessible. 
+> `GooseAI` is a fully managed inference service delivered via API. 
+> With feature parity to other well known APIs, `GooseAI` delivers a plug-and-play solution 
+> for serving open source language models at the industry's best economics by simply 
+> changing 2 lines in your code.

 ## Installation and Setup
+
 - Install the Python SDK with `pip install openai`
 - Get your GooseAI api key from this link [here](https://goose.ai/).
 - Set the environment variable (`GOOSEAI_API_KEY`).
@@ -13,11 +17,11 @@ import os
 os.environ["GOOSEAI_API_KEY"] = "YOUR_API_KEY"
 ```

-## Wrappers

-### LLM
+## LLMs
+
+See a [usage example](/docs/integrations/llms/gooseai).

-There exists an GooseAI LLM wrapper, which you can access with: 
 ```python
 from langchain_community.llms import GooseAI
 ```
--- a/docs/docs/integrations/providers/groq.mdx
+++ b/docs/docs/integrations/providers/groq.mdx
@@ -1,17 +1,20 @@
 # Groq

-Welcome to Groq! 🚀 At Groq, we've developed the world's first Language Processing Unit™, or LPU. The Groq LPU has a deterministic, single core streaming architecture that sets the standard for GenAI inference speed with predictable and repeatable performance for any given workload.
-
-Beyond the architecture, our software is designed to empower developers like you with the tools you need to create innovative, powerful AI applications. With Groq as your engine, you can:
-
-* Achieve uncompromised low latency and performance for real-time AI and HPC inferences 🔥
-* Know the exact performance and compute time for any given workload 🔮
-* Take advantage of our cutting-edge technology to stay ahead of the competition 💪
-
-Want more Groq? Check out our [website](https://groq.com) for more resources and join our [Discord community](https://discord.gg/JvNsBDKeCG) to connect with our developers!
+>[Groq](https://groq.com)developed the world's first Language Processing Unit™, or `LPU`. 
+> The `Groq LPU` has a deterministic, single core streaming architecture that sets the standard 
+> for GenAI inference speed with predictable and repeatable performance for any given workload.
+>
+>Beyond the architecture, `Groq` software is designed to empower developers like you with 
+> the tools you need to create innovative, powerful AI applications. 
+> 
+>With Groq as your engine, you can:
+>* Achieve uncompromised low latency and performance for real-time AI and HPC inferences 🔥
+>* Know the exact performance and compute time for any given workload 🔮
+>* Take advantage of our cutting-edge technology to stay ahead of the competition 💪


 ## Installation and Setup
+
 Install the integration package:

 ```bash
@@ -24,5 +27,10 @@ Request an [API key](https://wow.groq.com) and set it as an environment variable
 export GROQ_API_KEY=gsk_...
 ```

-## Chat Model
+## Chat models
+
 See a [usage example](/docs/integrations/chat/groq).
+
+```python
+from langchain_groq import ChatGroq
+```
--- a/docs/docs/integrations/providers/littlellm.md
+++ b/docs/docs/integrations/providers/littlellm.md
@@ -0,0 +1,37 @@
+# LiteLLM
+
+>[LiteLLM](https://docs.litellm.ai/docs/) is a library that simplifies calling Anthropic, 
+> Azure, Huggingface, Replicate, etc. LLMs in a unified way.
+> 
+>You can use `LiteLLM` through either:
+>
+>* [LiteLLM Proxy Server](https://docs.litellm.ai/docs/#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
+>* [LiteLLM python SDK](https://docs.litellm.ai/docs/#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
+
+## Installation and setup
+
+Install the `litellm` python package.
+
+```bash
+pip install litellm
+```
+
+## Chat models
+
+### ChatLiteLLM
+
+See a [usage example](/docs/integrations/chat/litellm).
+
+```python
+from langchain_community.chat_models import ChatLiteLLM
+```
+
+### ChatLiteLLMRouter
+
+You also can use the `ChatLiteLLMRouter` to route requests to different LLMs or LLM providers.
+
+See a [usage example](/docs/integrations/chat/litellm_router).
+
+```python
+from langchain_community.chat_models import ChatLiteLLMRouter
+```
--- a/docs/docs/integrations/providers/predibase.md
+++ b/docs/docs/integrations/providers/predibase.md
@@ -21,9 +21,24 @@ model = Predibase(
    model="mistral-7b",
    predibase_api_key=os.environ.get("PREDIBASE_API_TOKEN"),
    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)
+    """
+    Optionally use `model_kwargs` to set new default "generate()" settings.  For example:
+    {
+        "api_token": os.environ.get("HUGGING_FACE_HUB_TOKEN"),
+        "max_new_tokens": 5,  # default is 256
+    }
+    """
+    **model_kwargs,
 )

-response = model.invoke("Can you recommend me a nice dry wine?")
+"""
+Optionally use `kwargs` to dynamically overwrite "generate()" settings.  For example:
+{
+    "temperature": 0.5,  # default is the value in model_kwargs or 0.1 (initialization default)
+    "max_new_tokens": 1024,  # default is the value in model_kwargs or 256 (initialization default)
+}
+"""
+response = model.invoke("Can you recommend me a nice dry wine?", **kwargs)
 print(response)
 ```

@@ -42,9 +57,24 @@ model = Predibase(
    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)
    adapter_id="e2e_nlg",
    adapter_version=1,
+    """
+    Optionally use `model_kwargs` to set new default "generate()" settings.  For example:
+    {
+        "api_token": os.environ.get("HUGGING_FACE_HUB_TOKEN"),
+        "max_new_tokens": 5,  # default is 256
+    }
+    """
+    **model_kwargs,
 )

-response = model.invoke("Can you recommend me a nice dry wine?")
+"""
+Optionally use `kwargs` to dynamically overwrite "generate()" settings.  For example:
+{
+    "temperature": 0.5,  # default is the value in model_kwargs or 0.1 (initialization default)
+    "max_new_tokens": 1024,  # default is the value in model_kwargs or 256 (initialization default)
+}
+"""
+response = model.invoke("Can you recommend me a nice dry wine?", **kwargs)
 print(response)
 ```

@@ -62,8 +92,23 @@ model = Predibase(
    predibase_api_key=os.environ.get("PREDIBASE_API_TOKEN"),
    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)
    adapter_id="predibase/e2e_nlg",
+    """
+    Optionally use `model_kwargs` to set new default "generate()" settings.  For example:
+    {
+        "api_token": os.environ.get("HUGGING_FACE_HUB_TOKEN"),
+        "max_new_tokens": 5,  # default is 256
+    }
+    """
+    **model_kwargs,
 )

-response = model.invoke("Can you recommend me a nice dry wine?")
+"""
+Optionally use `kwargs` to dynamically overwrite "generate()" settings.  For example:
+{
+    "temperature": 0.5,  # default is the value in model_kwargs or 0.1 (initialization default)
+    "max_new_tokens": 1024,  # default is the value in model_kwargs or 256 (initialization default)
+}
+"""
+response = model.invoke("Can you recommend me a nice dry wine?", **kwargs)
 print(response)
 ```
--- a/docs/docs/integrations/providers/qdrant.mdx
+++ b/docs/docs/integrations/providers/qdrant.mdx
@@ -13,6 +13,19 @@ Install the Python partner package:
 pip install langchain-qdrant
 ```

+## Embedding models
+
+### FastEmbedSparse
+
+```python
+from langchain_qdrant import FastEmbedSparse
+```
+
+### SparseEmbeddings
+
+```python
+from langchain_qdrant import SparseEmbeddings
+```

 ## Vector Store

--- a/docs/docs/integrations/providers/ragatouille.ipynb
+++ b/docs/docs/integrations/providers/ragatouille.ipynb
@@ -7,7 +7,9 @@
   "source": [
    "# RAGatouille\n",
    "\n",
-    "[RAGatouille](https://github.com/bclavie/RAGatouille) makes it as simple as can be to use ColBERT! [ColBERT](https://github.com/stanford-futuredata/ColBERT) is a fast and accurate retrieval model, enabling scalable BERT-based search over large text collections in tens of milliseconds.\n",
+    ">[RAGatouille](https://github.com/bclavie/RAGatouille) makes it as simple as can be to use `ColBERT`! [ColBERT](https://github.com/stanford-futuredata/ColBERT) is a fast and accurate retrieval model, enabling scalable BERT-based search over large text collections in tens of milliseconds.\n",
+    ">\n",
+    ">See the [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](https://arxiv.org/abs/2112.01488) paper.\n",
    "\n",
    "There are multiple ways that we can use RAGatouille.\n",
    "\n",
@@ -258,7 +260,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/providers/rank_bm25.mdx
+++ b/docs/docs/integrations/providers/rank_bm25.mdx
@@ -0,0 +1,25 @@
+# rank_bm25
+
+[rank_bm25](https://github.com/dorianbrown/rank_bm25) is an open-source collection of algorithms
+designed to query documents and return the most relevant ones, commonly used for creating
+search engines.
+
+See its [project page](https://github.com/dorianbrown/rank_bm25) for available algorithms.
+
+
+## Installation and Setup
+
+First, you need to install `rank_bm25` python package.
+
+```bash
+pip install rank_bm25
+```
+
+
+## Retriever
+
+See a [usage example](/docs/integrations/retrievers/bm25).
+
+```python
+from langchain_community.retrievers import BM25Retriever
+```
--- a/docs/docs/integrations/providers/vdms.mdx
+++ b/docs/docs/integrations/providers/vdms.mdx
@@ -44,11 +44,12 @@ from langchain_community.vectorstores.vdms import VDMS_Client
 from langchain_huggingface import HuggingFaceEmbeddings

 client = VDMS_Client("localhost", 55555)
+model_name = "sentence-transformers/all-mpnet-base-v2"
 vectorstore = VDMS.from_documents(
    docs,
    client=client,
    collection_name="langchain-demo",
-    embedding_function=HuggingFaceEmbeddings(),
+    embedding_function=HuggingFaceEmbeddings(model_name=model_name),
    engine="FaissFlat"
    distance_strategy="L2",
 )
@@ -58,5 +59,3 @@ results = vectorstore.similarity_search(query)
 ```

 For a more detailed walkthrough of the VDMS wrapper, see [this notebook](/docs/integrations/vectorstores/vdms)
-
-
--- a/docs/docs/integrations/providers/yahoo.mdx
+++ b/docs/docs/integrations/providers/yahoo.mdx
@@ -0,0 +1,24 @@
+# Yahoo
+
+>[Yahoo (Wikipedia)](https://en.wikipedia.org/wiki/Yahoo) is an American web services provider.
+>
+> It provides a web portal, search engine Yahoo Search, and related 
+> services, including `My Yahoo`, `Yahoo Mail`, `Yahoo News`, 
+> `Yahoo Finance`, `Yahoo Sports` and its advertising platform, `Yahoo Native`.
+
+
+## Tools
+
+### Yahoo Finance News
+
+We have to install a python package:
+
+```bash
+pip install yfinance
+```
+See a [usage example](/docs/integrations/tools/yahoo_finance_news).
+
+
+```python
+from langchain_community.tools import YahooFinanceNewsTool
+```
--- a/docs/docs/integrations/providers/yandex.mdx
+++ b/docs/docs/integrations/providers/yandex.mdx
@@ -31,3 +31,26 @@ See a [usage example](/docs/integrations/chat/yandex).
 ```python
 from langchain_community.chat_models import ChatYandexGPT
 ```
+
+## Embedding models
+
+### YandexGPT
+
+See a [usage example](/docs/integrations/text_embedding/yandex).
+
+```python
+from langchain_community.embeddings import YandexGPTEmbeddings
+```
+
+## Parser
+
+### YandexSTTParser
+
+It transcribes and parses audio files. 
+
+`YandexSTTParser` is similar to the `OpenAIWhisperParser`.
+See a [usage example with OpenAIWhisperParser](/docs/integrations/document_loaders/youtube_audio).
+
+```python
+from langchain_community.document_loaders import YandexSTTParser
+```
--- a/docs/docs/integrations/providers/yellowbrick.mdx
+++ b/docs/docs/integrations/providers/yellowbrick.mdx
@@ -0,0 +1,17 @@
+# Yellowbrick
+
+>[Yellowbrick](https://yellowbrick.com/) is a provider of 
+> Enterprise Data Warehousing, Ad-hoc and Streaming Analytics, 
+> BI and AI workloads. 
+
+## Vector store
+
+We have to install a python package:
+
+```bash
+pip install psycopg2
+```
+
+```python
+from langchain_community.vectorstores import Yellowbrick
+```
--- a/docs/docs/integrations/providers/you.mdx
+++ b/docs/docs/integrations/providers/you.mdx
@@ -0,0 +1,19 @@
+# You
+
+>[You](https://you.com/about) company provides an AI productivity platform.
+
+## Retriever
+
+See a [usage example](/docs/integrations/retrievers/you-retriever).
+
+```python
+from langchain_community.retrievers.you import YouRetriever
+```
+
+## Tools
+
+See a [usage example](/docs/integrations/tools/you).
+
+```python
+from langchain_community.tools.you import YouSearchTool
+```
--- a/docs/docs/integrations/retrievers/ragatouille.ipynb
+++ b/docs/docs/integrations/retrievers/ragatouille.ipynb
@@ -11,6 +11,8 @@
    ">[RAGatouille](https://github.com/bclavie/RAGatouille) makes it as simple as can be to use `ColBERT`!\n",
    ">\n",
    ">[ColBERT](https://github.com/stanford-futuredata/ColBERT) is a fast and accurate retrieval model, enabling scalable BERT-based search over large text collections in tens of milliseconds.\n",
+    ">\n",
+    ">See the [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](https://arxiv.org/abs/2112.01488) paper.\n",
    "\n",
    "We can use this as a [retriever](/docs/how_to#retrievers). It will show functionality specific to this integration. After going through, it may be useful to explore [relevant use-case pages](/docs/how_to#qa-with-rag) to learn how to use this vector store as part of a larger chain.\n",
    "\n",
--- a/docs/docs/integrations/retrievers/self_query/neo4j_self_query.ipynb
+++ b/docs/docs/integrations/retrievers/self_query/neo4j_self_query.ipynb
@@ -0,0 +1,403 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Neo4j\n",
+    "\n",
+    ">[Neo4j](https://neo4j.com/docs/) is a graph database that stores nodes and relationships, that also supports native vector search.\n",
+    "\n",
+    "In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `Neo4j` vector store. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating a Neo4j vector store\n",
+    "First we'll want to create a Neo4j vector store and seed it with some data. We've created a small demo set of documents that contain summaries of movies."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: neo4j in /Users/moyi/git/langchain/env/lib/python3.11/site-packages (5.24.0)\n",
+      "Requirement already satisfied: pytz in /Users/moyi/git/langchain/env/lib/python3.11/site-packages (from neo4j) (2024.1)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install --upgrade neo4j"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "OpenAI API Key: ········\n"
+     ]
+    }
+   ],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Neo4j URL: ········\n",
+      "Neo4j User Name: ········\n",
+      "Neo4j Password: ········\n"
+     ]
+    }
+   ],
+   "source": [
+    "# To run this notebook, you can set up a free neo4j account on neo4j.com and input the following information.\n",
+    "# (If you are having trouble connecting to the database, try using neo4j+ssc: instead of neo4j+s)\n",
+    "\n",
+    "os.environ[\"NEO4J_URI\"] = getpass.getpass(\"Neo4j URL:\")\n",
+    "os.environ[\"NEO4J_USERNAME\"] = getpass.getpass(\"Neo4j User Name:\")\n",
+    "os.environ[\"NEO4J_PASSWORD\"] = getpass.getpass(\"Neo4j Password:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.vectorstores import Neo4jVector\n",
+    "from langchain_core.documents import Document\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "embeddings = OpenAIEmbeddings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: CALL subquery without a variable scope clause is now deprecated. Use CALL (row) { ... }} {position: line: 1, column: 21, offset: 20} for query: \"UNWIND $data AS row CALL { WITH row MERGE (c:`Chunk` {id: row.id}) WITH c, row CALL db.create.setNodeVectorProperty(c, 'embedding', row.embedding) SET c.`text` = row.text SET c += row.metadata } IN TRANSACTIONS OF 1000 ROWS \"\n"
+     ]
+    }
+   ],
+   "source": [
+    "docs = [\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
+    "        metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
+    "        metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
+    "        metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
+    "        metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Toys come alive and have a blast doing so\",\n",
+    "        metadata={\"year\": 1995, \"genre\": \"animated\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
+    "        metadata={\n",
+    "            \"year\": 1979,\n",
+    "            \"director\": \"Andrei Tarkovsky\",\n",
+    "            \"genre\": \"science fiction\",\n",
+    "            \"rating\": 9.9,\n",
+    "        },\n",
+    "    ),\n",
+    "]\n",
+    "vectorstore = Neo4jVector.from_documents(docs, embeddings)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating our self-querying retriever\n",
+    "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains.query_constructor.base import AttributeInfo\n",
+    "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
+    "from langchain_openai import OpenAI\n",
+    "\n",
+    "metadata_field_info = [\n",
+    "    AttributeInfo(\n",
+    "        name=\"genre\",\n",
+    "        description=\"The genre of the movie\",\n",
+    "        type=\"string or list[string]\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"year\",\n",
+    "        description=\"The year the movie was released\",\n",
+    "        type=\"integer\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"director\",\n",
+    "        description=\"The name of the movie director\",\n",
+    "        type=\"string\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
+    "    ),\n",
+    "]\n",
+    "document_content_description = \"Brief summary of a movie\"\n",
+    "llm = OpenAI(temperature=0)\n",
+    "retriever = SelfQueryRetriever.from_llm(\n",
+    "    llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Testing it out\n",
+    "And now we can try actually using our retriever!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'genre': 'science fiction', 'year': 1993, 'rating': 7.7}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
+       " Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so'),\n",
+       " Document(metadata={'genre': 'science fiction', 'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       " Document(metadata={'year': 2006, 'rating': 8.6, 'director': 'Satoshi Kon'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example only specifies a relevant query\n",
+    "retriever.invoke(\"What are some movies about dinosaurs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'genre': 'science fiction', 'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       " Document(metadata={'year': 2006, 'rating': 8.6, 'director': 'Satoshi Kon'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example only specifies a filter\n",
+    "retriever.invoke(\"I want to watch a movie rated higher than 8.5\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'year': 2019, 'rating': 8.3, 'director': 'Greta Gerwig'}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them')]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a query and a filter\n",
+    "retriever.invoke(\"Has Greta Gerwig directed any movies about women\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'year': 2006, 'rating': 8.6, 'director': 'Satoshi Kon'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),\n",
+       " Document(metadata={'genre': 'science fiction', 'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky'}, page_content='Three men walk into the Zone, three men walk out of the Zone')]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a composite filter\n",
+    "retriever.invoke(\"What's a highly rated (above 8.5) science fiction film?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so')]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a query and composite filter\n",
+    "retriever.invoke(\n",
+    "    \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Filter k\n",
+    "\n",
+    "We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
+    "\n",
+    "We can do this by passing `enable_limit=True` to the constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = SelfQueryRetriever.from_llm(\n",
+    "    llm,\n",
+    "    vectorstore,\n",
+    "    document_content_description,\n",
+    "    metadata_field_info,\n",
+    "    enable_limit=True,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'genre': 'science fiction', 'year': 1993, 'rating': 7.7}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
+       " Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so')]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example only specifies a relevant query\n",
+    "retriever.invoke(\"what are two movies about dinosaurs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/docs/docs/integrations/text_embedding/bge_huggingface.ipynb
+++ b/docs/docs/integrations/text_embedding/bge_huggingface.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# BGE on Hugging Face\n",
    "\n",
-    ">[BGE models on the HuggingFace](https://huggingface.co/BAAI/bge-large-en) are [the best open-source embedding models](https://huggingface.co/spaces/mteb/leaderboard).\n",
+    ">[BGE models on the HuggingFace](https://huggingface.co/BAAI/bge-large-en-v1.5) are one of [the best open-source embedding models](https://huggingface.co/spaces/mteb/leaderboard).\n",
    ">BGE model is created by the [Beijing Academy of Artificial Intelligence (BAAI)](https://en.wikipedia.org/wiki/Beijing_Academy_of_Artificial_Intelligence). `BAAI` is a private non-profit organization engaged in AI research and development.\n",
    "\n",
    "This notebook shows how to use `BGE Embeddings` through `Hugging Face`"
--- a/docs/docs/integrations/text_embedding/google_vertex_ai_palm.ipynb
+++ b/docs/docs/integrations/text_embedding/google_vertex_ai_palm.ipynb
@@ -1,89 +1,307 @@
 {
 "cells": [
  {
-   "cell_type": "markdown",
+   "cell_type": "raw",
+   "id": "afaf8039",
   "metadata": {},
   "source": [
-    "# Google Vertex AI PaLM \n",
+    "---\n",
+    "sidebar_label: Google Vertex AI \n",
+    "keywords: [Vertex AI, vertexai , Google Cloud, embeddings]\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a3d6f34",
+   "metadata": {},
+   "source": [
+    "# Google Vertex AI Embeddings \n",
    "\n",
-    ">[Vertex AI PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) is a service on Google Cloud exposing the embedding models. \n",
+    "This will help you get started with Google Vertex AI Embeddings models using LangChain. For detailed documentation on `Google Vertex AI Embeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/v0.2/api_reference/google_vertexai/embeddings/langchain_google_vertexai.embeddings.VertexAIEmbeddings.html).\n",
    "\n",
-    "Note: This integration is separate from the Google PaLM integration.\n",
+    "## Overview\n",
+    "### Integration details\n",
    "\n",
-    "By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) Customer Data to train its foundation models as part of Google Cloud`s AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
+    "| Provider | Package |\n",
+    "|:--------:|:-------:|\n",
+    "| [Google](https://python.langchain.com/v0.2/docs/integrations/platforms/google/) | [langchain-google-vertexai](https://python.langchain.com/v0.2/api_reference/google_vertexai/embeddings/langchain_google_vertexai.embeddings.VertexAIEmbeddings.html) |\n",
    "\n",
-    "To use Vertex AI PaLM you must have the `langchain-google-vertexai` Python package installed and either:\n",
-    "- Have credentials configured for your environment (gcloud, workload identity, etc...)\n",
-    "- Store the path to a service account JSON file as the GOOGLE_APPLICATION_CREDENTIALS environment variable\n",
+    "## Setup\n",
    "\n",
-    "This codebase uses the `google.auth` library which first looks for the application credentials variable mentioned above, and then looks for system-level auth.\n",
+    "To access Google Vertex AI Embeddings models you'll need to \n",
+    "- Create a Google Cloud account \n",
+    "- Install the `langchain-google-vertexai` integration package.\n",
    "\n",
-    "For more information, see: \n",
-    "- https://cloud.google.com/docs/authentication/application-default-credentials#GAC\n",
-    "- https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth\n",
-    "\n"
+    "\n",
+    "\n",
+    "\n",
+    "### Credentials\n",
+    "\n",
+    "\n",
+    "Head to [Google Cloud](https://cloud.google.com/free/) to sign up to create an account. Once you've done this set the GOOGLE_APPLICATION_CREDENTIALS environment variable:\n",
+    "\n",
+    "For more information, see:\n",
+    "\n",
+    "https://cloud.google.com/docs/authentication/application-default-credentials#GAC\n",
+    "https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "caaba519-3476-423b-a5e4-d99a10929506",
+   "metadata": {},
+   "source": [
+    "**OPTIONAL : Authenticate your notebook environment (Colab only)**\n",
+    "\n",
+    "If you're running this notebook on Google Colab, run the cell below to authenticate your environment."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
+   "id": "b0770000-3667-439b-8c46-acc5af7c8e40",
+   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet langchain langchain-google-vertexai"
+    "import sys\n",
+    "\n",
+    "if \"google.colab\" in sys.modules:\n",
+    "    from google.colab import auth\n",
+    "\n",
+    "    auth.authenticate_user()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9fbd4a33-2480-4ad1-8d56-aec730b3662b",
+   "metadata": {},
+   "source": [
+    "**Set Google Cloud project information and initialize Vertex AI SDK**\n",
+    "\n",
+    "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
+    "\n",
+    "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
+   "id": "36521c2a",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_google_vertexai import VertexAIEmbeddings"
+    "PROJECT_ID = \"[your-project-id]\"  # @param {type:\"string\"}\n",
+    "LOCATION = \"us-central1\"  # @param {type:\"string\"}\n",
+    "\n",
+    "import vertexai\n",
+    "\n",
+    "vertexai.init(project=PROJECT_ID, location=LOCATION)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c84fb993",
+   "metadata": {},
+   "source": [
+    "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39a4953b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
+    "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d9664366",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "The LangChain Google Vertex AI Embeddings integration lives in the `langchain-google-vertexai` package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "64853226",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain-google-vertexai"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "45dd1724",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our model object and generate embeddings:\n",
+    ">Check the list of [Supported Models](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#supported-models)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
+   "id": "9ea7a09b",
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = VertexAIEmbeddings()"
+    "from langchain_google_vertexai import VertexAIEmbeddings\n",
+    "\n",
+    "# Initialize the a specific Embeddings Model version\n",
+    "embeddings = VertexAIEmbeddings(model_name=\"text-embedding-004\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "77d271b6",
+   "metadata": {},
+   "source": [
+    "## Indexing and Retrieval\n",
+    "\n",
+    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our RAG tutorials under the [working with external knowledge tutorials](/docs/tutorials/#working-with-external-knowledge).\n",
+    "\n",
+    "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
+   "id": "d817716b",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'LangChain is the framework for building context-aware reasoning applications'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "text = \"This is a test document.\""
+    "# Create a vector store with a sample text\n",
+    "from langchain_core.vectorstores import InMemoryVectorStore\n",
+    "\n",
+    "text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
+    "\n",
+    "vectorstore = InMemoryVectorStore.from_texts(\n",
+    "    [text],\n",
+    "    embedding=embeddings,\n",
+    ")\n",
+    "\n",
+    "# Use the vectorstore as a retriever\n",
+    "retriever = vectorstore.as_retriever()\n",
+    "\n",
+    "# Retrieve the most similar text\n",
+    "retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
+    "\n",
+    "# show the retrieved document's content\n",
+    "retrieved_documents[0].page_content"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e02b9855",
+   "metadata": {},
+   "source": [
+    "## Direct Usage\n",
+    "\n",
+    "Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
+    "\n",
+    "You can directly call these methods to get embeddings for your own use cases.\n",
+    "\n",
+    "### Embed single texts\n",
+    "\n",
+    "You can embed single texts or documents with `embed_query`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0d2befcd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.02831101417541504, 0.022063178941607475, -0.07454229146242142, 0.006448323838412762, 0.001955120\n"
+     ]
+    }
+   ],
+   "source": [
+    "single_vector = embeddings.embed_query(text)\n",
+    "print(str(single_vector)[:100])  # Show the first 100 characters of the vector"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b5a7d03",
+   "metadata": {},
+   "source": [
+    "### Embed multiple texts\n",
+    "\n",
+    "You can embed multiple texts with `embed_documents`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
+   "id": "2f4d6e97",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.01092718355357647, 0.01213780976831913, -0.05650627985596657, 0.006737854331731796, 0.0085973171\n",
+      "[0.010135706514120102, 0.01234869472682476, -0.07284046709537506, 0.00027134662377648056, 0.01546290\n"
+     ]
+    }
+   ],
   "source": [
-    "query_result = embeddings.embed_query(text)"
+    "text2 = (\n",
+    "    \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
+    ")\n",
+    "two_vectors = embeddings.embed_documents([text, text2])\n",
+    "for vector in two_vectors:\n",
+    "    print(str(vector)[:100])  # Show the first 100 characters of the vector"
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": 6,
+   "cell_type": "markdown",
+   "id": "98785c12",
   "metadata": {},
-   "outputs": [],
   "source": [
-    "doc_result = embeddings.embed_documents([text])"
+    "## API Reference\n",
+    "\n",
+    "For detailed documentation on `Google Vertex AI Embeddings\n",
+    "` features and configuration options, please refer to the [API reference](https://python.langchain.com/v0.2/api_reference/google_vertexai/embeddings/langchain_google_vertexai.embeddings.VertexAIEmbeddings.html).\n"
   ]
  }
 ],
 "metadata": {
+  "environment": {
+   "kernel": "python310",
+   "name": "tf2-gpu.2-6.m104",
+   "type": "gcloud",
+   "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-6:m104"
+  },
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
@@ -97,14 +315,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "cc99336516f23363341912c6723b01ace86f02e26b4290be1efc0677e2e2ec24"
-   }
+   "version": "3.11.6"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
--- a/docs/docs/integrations/text_embedding/huggingfacehub.ipynb
+++ b/docs/docs/integrations/text_embedding/huggingfacehub.ipynb
@@ -36,7 +36,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = HuggingFaceEmbeddings()"
+    "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")"
   ]
  },
  {
--- a/docs/docs/integrations/text_embedding/ipex_llm_gpu.ipynb
+++ b/docs/docs/integrations/text_embedding/ipex_llm_gpu.ipynb
@@ -17,9 +17,9 @@
    "## Install Prerequisites\n",
    "To benefit from IPEX-LLM on Intel GPUs, there are several prerequisite steps for tools installation and environment preparation.\n",
    "\n",
-    "If you are a Windows user, visit the [Install IPEX-LLM on Windows with Intel GPU Guide](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_windows_gpu.html), and follow [Install Prerequisites](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_windows_gpu.html#install-prerequisites) to update GPU driver (optional) and install Conda.\n",
+    "If you are a Windows user, visit the [Install IPEX-LLM on Windows with Intel GPU Guide](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_windows_gpu.md), and follow [Install Prerequisites](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_windows_gpu.md#install-prerequisites) to update GPU driver (optional) and install Conda.\n",
    "\n",
-    "If you are a Linux user, visit the [Install IPEX-LLM on Linux with Intel GPU](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_linux_gpu.html), and follow [**Install Prerequisites**](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_linux_gpu.html#install-prerequisites) to install GPU driver, Intel® oneAPI Base Toolkit 2024.0, and Conda.\n",
+    "If you are a Linux user, visit the [Install IPEX-LLM on Linux with Intel GPU](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md), and follow [**Install Prerequisites**](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md#install-prerequisites) to install GPU driver, Intel® oneAPI Base Toolkit 2024.0, and Conda.\n",
    "\n",
    "## Setup\n",
    "\n",
@@ -105,7 +105,7 @@
    ">\n",
    "> For the first time that each model runs on Intel iGPU/Intel Arc A300-Series or Pro A60, it may take several minutes to compile.\n",
    ">\n",
-    "> For other GPU type, please refer to [here](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Overview/install_gpu.html#runtime-configuration) for Windows users, and  [here](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Overview/install_gpu.html#id5) for Linux users.\n",
+    "> For other GPU type, please refer to [here](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Overview/install_gpu.md#runtime-configuration) for Windows users, and  [here](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Overview/install_gpu.md#runtime-configuration-1) for Linux users.\n",
    "\n",
    "\n",
    "## Basic Usage\n",
--- a/docs/docs/integrations/text_embedding/text_embeddings_inference.ipynb
+++ b/docs/docs/integrations/text_embedding/text_embeddings_inference.ipynb
@@ -39,7 +39,9 @@
    "volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run\n",
    "\n",
    "docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.6 --model-id $model --revision $revision\n",
-    "```"
+    "```\n",
+    "\n",
+    "Specifics on Docker usage might vary with the underlying hardware. For example, to serve the model on Intel Gaudi/Gaudi2 hardware, refer to the [tei-gaudi repository](https://github.com/huggingface/tei-gaudi) for the relevant docker run command."
   ]
  },
  {
--- a/docs/docs/integrations/tools/google_cloud_texttospeech.ipynb
+++ b/docs/docs/integrations/tools/google_cloud_texttospeech.ipynb
@@ -8,6 +8,8 @@
    "# Google Cloud Text-to-Speech\n",
    "\n",
    ">[Google Cloud Text-to-Speech](https://cloud.google.com/text-to-speech) enables developers to synthesize natural-sounding speech with 100+ voices, available in multiple languages and variants. It applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks to deliver the highest fidelity possible.\n",
+    ">\n",
+    ">It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi.\n",
    "\n",
    "This notebook shows how to interact with the `Google Cloud Text-to-Speech API` to achieve speech synthesis capabilities."
   ]
@@ -22,12 +24,38 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da",
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet  google-cloud-text-to-speech langchain-community"
+    "!pip install --upgrade langchain-google-community[texttospeech]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5b86ad38-ac8a-4f0a-a492-01a6e3090c8c",
+   "metadata": {},
+   "source": [
+    "## Instantiation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "e2efded2-894b-4683-89ed-2a6948913fa9",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-01T17:47:31.565221Z",
+     "iopub.status.busy": "2024-09-01T17:47:31.564804Z",
+     "iopub.status.idle": "2024-09-01T17:47:31.570600Z",
+     "shell.execute_reply": "2024-09-01T17:47:31.569764Z",
+     "shell.execute_reply.started": "2024-09-01T17:47:31.565188Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_google_community import TextToSpeechTool"
   ]
  },
  {
@@ -35,18 +63,34 @@
   "id": "434b2454-2bff-484d-822c-4026a9dc1383",
   "metadata": {},
   "source": [
-    "## Usage"
+    "## Deprecated GoogleCloudTextToSpeechTool"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-01T17:51:28.763915Z",
+     "iopub.status.busy": "2024-09-01T17:51:28.763664Z",
+     "iopub.status.idle": "2024-09-01T17:51:28.779073Z",
+     "shell.execute_reply": "2024-09-01T17:51:28.778477Z",
+     "shell.execute_reply.started": "2024-09-01T17:51:28.763897Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_community.tools import GoogleCloudTextToSpeechTool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
+   "id": "a2647bc5-e494-41f9-9f53-4a278ea30cc1",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.tools import GoogleCloudTextToSpeechTool\n",
-    "\n",
    "text_to_speak = \"Hello world!\"\n",
    "\n",
    "tts = GoogleCloudTextToSpeechTool()\n",
--- a/docs/docs/integrations/tools/jina_search.ipynb
+++ b/docs/docs/integrations/tools/jina_search.ipynb
@@ -0,0 +1,284 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "10238e62-3465-4973-9279-606cbb7ccf16",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Jina Search\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a6f91f20",
+   "metadata": {},
+   "source": [
+    "# Jina Search\n",
+    "\n",
+    "This notebook provides a quick overview for getting started with Jina [tool](/docs/integrations/tools/). For detailed documentation of all Jina features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/community/tools/langchain_community.tools.jina_search.tool.JinaSearch.html).\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "### Integration details\n",
+    "\n",
+    "| Class | Package | Serializable | JS support |  Package latest |\n",
+    "| :--- | :--- | :---: | :---: | :---: |\n",
+    "| [JinaSearch](https://python.langchain.com/v0.2/api_reference/community/tools/langchain_community.tools.jina_search.tool.JinaSearch.html) | [langchain-community](https://python.langchain.com/v0.2/api_reference/community/) | ❌ | ❌ |  ![PyPI - Version](https://img.shields.io/pypi/v/langchain-community?style=flat-square&label=%20) |\n",
+    "\n",
+    "### Tool features\n",
+    "| [Returns artifact](/docs/how_to/tool_artifacts/) | Native async | Return data | Pricing |\n",
+    "| :---: | :---: | :---: | :---: |\n",
+    "| ❌ | ❌ | URL, Snippet, Title, Page Content | 1M response tokens free | \n",
+    "\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "The integration lives in the `langchain-community` package and was added in version `0.2.16`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f85b4089",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install --quiet -U \"langchain-community>=0.2.16\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b15e9266",
+   "metadata": {},
+   "source": [
+    "### Credentials"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import getpass\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc5ab717-fd27-4c59-b912-bdd099541478",
+   "metadata": {},
+   "source": [
+    "It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a6c2f136-6367-4f1f-825d-ae741e1bf281",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
+    "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "- TODO: Fill in instantiation params\n",
+    "\n",
+    "Here we show how to instantiate an instance of the Jina tool, with "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.tools import JinaSearch\n",
+    "\n",
+    "tool = JinaSearch()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "74147a1a",
+   "metadata": {},
+   "source": [
+    "## Invocation\n",
+    "\n",
+    "### [Invoke directly with args](/docs/concepts/#invoke-with-just-the-arguments)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{\"title\": \"LangGraph\", \"link\": \"https://www.langchain.com/langgraph\", \"snippet\": \"<strong>LangGraph</strong> helps teams of all sizes, across all industries, from ambitious startups to established enterprises. \\u201cLangChain is streets ahead with what they&#x27;ve put forward with <strong>LangGraph</strong>.\", \"content\": \"![Image 1](https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/667b080e4b3ca12dc5d5d439_Langgraph%20UI-2.webp)\\n\\nControllable cognitive architecture for any task\\n------------------------------------------------\\n\\nLangGraph's flexible API supports diverse control flows \\u2013 single agent, multi-agent, hierarchical, sequential \\u2013 and robustly handles realistic, complex scenarios.\\n\\nEnsure reliability with easy-to-add moderation and quality loops that prevent agents from veering off course.\\n\\n[See the docs](https://langchain-ai.github.io/langgraph/)\\n\\nDesigned for human-agent collaboration\\n--------------------------------------\\n\\nWith built-in stat\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tool.invoke({\"query\": \"what is langgraph\"})[:1000])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6e73897",
+   "metadata": {},
+   "source": [
+    "### [Invoke with ToolCall](/docs/concepts/#invoke-with-toolcall)\n",
+    "\n",
+    "We can also invoke the tool with a model-generated ToolCall, in which case a ToolMessage will be returned:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f90e33a7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{\"title\": \"LangGraph Tutorial: What Is LangGraph and How to Use It?\", \"link\": \"https://www.datacamp.com/tutorial/langgraph-tutorial\", \"snippet\": \"<strong>LangGraph</strong> <strong>is</strong> a library within the LangChain ecosystem that provides a framework for defining, coordinating, and executing multiple LLM agents (or chains) in a structured and efficient manner.\", \"content\": \"Imagine you're building a complex, multi-agent large language model (LLM) application. It's exciting, but it comes with challenges: managing the state of various agents, coordinating their interactions, and handling errors effectively. This is where LangGraph can help.\\n\\nLangGraph is a library within the LangChain ecosystem designed to tackle these challenges head-on. LangGraph provides a framework for defining, coordinating, and executing multiple LLM agents (or chains) in a structured manner.\\n\\nIt simplifies the development process by enabling the creation of cyclical graphs, which are essential for de\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This is usually generated by a model, but we'll create a tool call directly for demo purposes.\n",
+    "model_generated_tool_call = {\n",
+    "    \"args\": {\"query\": \"what is langgraph\"},\n",
+    "    \"id\": \"1\",\n",
+    "    \"name\": tool.name,\n",
+    "    \"type\": \"tool_call\",\n",
+    "}\n",
+    "tool_msg = tool.invoke(model_generated_tool_call)\n",
+    "print(tool_msg.content[:1000])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "659f9fbd-6fcf-445f-aa8c-72d8e60154bd",
+   "metadata": {},
+   "source": [
+    "## Chaining\n",
+    "\n",
+    "We can use our tool in a chain by first binding it to a [tool-calling model](/docs/how_to/tool_calling/) and then calling it:\n",
+    "\n",
+    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
+    "\n",
+    "<ChatModelTabs customVarName=\"llm\" />\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af3123ad-7a02-40e5-b58e-7d56e23e5830",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | output: false\n",
+    "# | echo: false\n",
+    "\n",
+    "# !pip install -qU langchain langchain-openai\n",
+    "from langchain.chat_models import init_chat_model\n",
+    "\n",
+    "llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "fdbf35b5-3aaf-4947-9ec6-48c21533fb95",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=\"LangGraph is a library designed for building stateful, multi-actor applications with language models (LLMs). It is particularly useful for creating agent and multi-agent workflows. Compared to other LLM frameworks, LangGraph offers unique benefits such as cycles, controllability, and persistence. Here are some key points:\\n\\n1. **Stateful and Multi-Actor Applications**: LangGraph allows for the definition of flows involving cycles, essential for most agentic architectures. This is a significant differentiation from Directed Acyclic Graph (DAG)-based solutions.\\n\\n2. **Controllability**: The framework offers fine-grained control over both the flow and state of applications, which is crucial for creating reliable agents.\\n\\n3. **Persistence**: Built-in persistence is available, enabling advanced features like human-in-the-loop workflows and memory.\\n\\n4. **Human-in-the-Loop**: LangGraph supports interrupting graph execution for human approval or editing of the agent's next planned action.\\n\\n5. **Streaming Support**: The library can stream outputs as they are produced by each node, including token streaming.\\n\\n6. **Integration with LangChain**: While it integrates seamlessly with LangChain and LangSmith, LangGraph can also be used independently.\\n\\n7. **Inspiration and Interface**: LangGraph is inspired by systems like Pregel and Apache Beam, with its public interface drawing inspiration from NetworkX.\\n\\nLangGraph is designed to handle more complex agent applications that require cycles and state management, making it an ideal choice for developers seeking to build sophisticated LLM-driven applications. For more detailed information, you can visit their [official documentation](https://langchain-ai.github.io/langgraph/).\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 338, 'prompt_tokens': 14774, 'total_tokens': 15112}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_157b3831f5', 'finish_reason': 'stop', 'logprobs': None}, id='run-420d16ed-535c-41c6-8814-2186b42be0f8-0', usage_metadata={'input_tokens': 14774, 'output_tokens': 338, 'total_tokens': 15112})"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_core.runnables import RunnableConfig, chain\n",
+    "\n",
+    "prompt = ChatPromptTemplate(\n",
+    "    [\n",
+    "        (\"system\", \"You are a helpful assistant.\"),\n",
+    "        (\"human\", \"{user_input}\"),\n",
+    "        (\"placeholder\", \"{messages}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "\n",
+    "llm_with_tools = llm.bind_tools([tool])\n",
+    "llm_chain = prompt | llm_with_tools\n",
+    "\n",
+    "\n",
+    "@chain\n",
+    "def tool_chain(user_input: str, config: RunnableConfig):\n",
+    "    input_ = {\"user_input\": user_input}\n",
+    "    ai_msg = llm_chain.invoke(input_, config=config)\n",
+    "    tool_msgs = tool.batch(ai_msg.tool_calls, config=config)\n",
+    "    return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n",
+    "\n",
+    "\n",
+    "tool_chain.invoke(\"what's langgraph\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4ac8146c",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For detailed documentation of all Jina features and configurations head to the API reference: https://python.langchain.com/v0.2/api_reference/community/tools/langchain_community.tools.jina_search.tool.JinaSearch.html"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "poetry-venv-311",
+   "language": "python",
+   "name": "poetry-venv-311"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/vectorstores/annoy.ipynb
+++ b/docs/docs/integrations/vectorstores/annoy.ipynb
@@ -57,7 +57,8 @@
    "from langchain_community.vectorstores import Annoy\n",
    "from langchain_huggingface import HuggingFaceEmbeddings\n",
    "\n",
-    "embeddings_func = HuggingFaceEmbeddings()"
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "embeddings_func = HuggingFaceEmbeddings(model_name=model_name)"
   ]
  },
  {
--- a/docs/docs/integrations/vectorstores/faiss.ipynb
+++ b/docs/docs/integrations/vectorstores/faiss.ipynb
@@ -7,7 +7,9 @@
   "source": [
    "# Faiss\n",
    "\n",
-    ">[Facebook AI Similarity Search (FAISS)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.\n",
+    ">[Facebook AI Similarity Search (FAISS)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also includes supporting code for evaluation and parameter tuning.\n",
+    ">\n",
+    ">See [The FAISS Library](https://arxiv.org/pdf/2401.08281) paper.\n",
    "\n",
    "You can find the FAISS documentation at [this page](https://faiss.ai/).\n",
    "\n",
@@ -528,7 +530,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/vectorstores/faiss_async.ipynb
+++ b/docs/docs/integrations/vectorstores/faiss_async.ipynb
@@ -7,7 +7,9 @@
   "source": [
    "# Faiss (Async)\n",
    "\n",
-    ">[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.\n",
+    ">[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also includes supporting code for evaluation and parameter tuning.\n",
+    ">\n",
+    ">See [The FAISS Library](https://arxiv.org/pdf/2401.08281) paper.\n",
    "\n",
    "[Faiss documentation](https://faiss.ai/).\n",
    "\n",
--- a/docs/docs/integrations/vectorstores/scann.ipynb
+++ b/docs/docs/integrations/vectorstores/scann.ipynb
@@ -61,7 +61,8 @@
    "docs = text_splitter.split_documents(documents)\n",
    "\n",
    "\n",
-    "embeddings = HuggingFaceEmbeddings()\n",
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=model_name)\n",
    "\n",
    "db = ScaNN.from_documents(docs, embeddings)\n",
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
--- a/docs/docs/integrations/vectorstores/semadb.ipynb
+++ b/docs/docs/integrations/vectorstores/semadb.ipynb
@@ -45,7 +45,8 @@
   "source": [
    "from langchain_huggingface import HuggingFaceEmbeddings\n",
    "\n",
-    "embeddings = HuggingFaceEmbeddings()"
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=model_name)"
   ]
  },
  {
--- a/docs/docs/integrations/vectorstores/surrealdb.ipynb
+++ b/docs/docs/integrations/vectorstores/surrealdb.ipynb
@@ -92,7 +92,8 @@
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "docs = text_splitter.split_documents(documents)\n",
    "\n",
-    "embeddings = HuggingFaceEmbeddings()"
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=model_name)"
   ]
  },
  {
--- a/docs/docs/integrations/vectorstores/tiledb.ipynb
+++ b/docs/docs/integrations/vectorstores/tiledb.ipynb
@@ -51,7 +51,8 @@
    "raw_documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "documents = text_splitter.split_documents(raw_documents)\n",
-    "embeddings = HuggingFaceEmbeddings()\n",
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=model_name)\n",
    "db = TileDB.from_documents(\n",
    "    documents, embeddings, index_uri=\"/tmp/tiledb_index\", index_type=\"FLAT\"\n",
    ")"
--- a/docs/docs/integrations/vectorstores/vald.ipynb
+++ b/docs/docs/integrations/vectorstores/vald.ipynb
@@ -50,7 +50,8 @@
    "raw_documents = TextLoader(\"state_of_the_union.txt\").load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "documents = text_splitter.split_documents(raw_documents)\n",
-    "embeddings = HuggingFaceEmbeddings()\n",
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=model_name)\n",
    "db = Vald.from_documents(documents, embeddings, host=\"localhost\", port=8080)"
   ]
  },
@@ -197,7 +198,8 @@
    "raw_documents = TextLoader(\"state_of_the_union.txt\").load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "documents = text_splitter.split_documents(raw_documents)\n",
-    "embeddings = HuggingFaceEmbeddings()\n",
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=model_name)\n",
    "\n",
    "db = Vald.from_documents(\n",
    "    documents,\n",
--- a/docs/docs/integrations/vectorstores/vdms.ipynb
+++ b/docs/docs/integrations/vectorstores/vdms.ipynb
@@ -200,7 +200,8 @@
    "\n",
    "\n",
    "# create the open-source embedding function\n",
-    "embedding = HuggingFaceEmbeddings()\n",
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "embedding = HuggingFaceEmbeddings(model_name=model_name)\n",
    "print(\n",
    "    f\"# Embedding Dimensions: {len(embedding.embed_query('This is a test document.'))}\"\n",
    ")"
--- a/docs/docs/tutorials/llm_chain.ipynb
+++ b/docs/docs/tutorials/llm_chain.ipynb
@@ -479,8 +479,6 @@
    "\n",
    "```python\n",
    "#!/usr/bin/env python\n",
-    "from typing import List\n",
-    "\n",
    "from fastapi import FastAPI\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
@@ -512,7 +510,6 @@
    ")\n",
    "\n",
    "# 5. Adding chain route\n",
-    "\n",
    "add_routes(\n",
    "    app,\n",
    "    chain,\n",
--- a/docs/docs/tutorials/rag.ipynb
+++ b/docs/docs/tutorials/rag.ipynb
@@ -607,7 +607,7 @@
    "```{=mdx}\n",
    "<ChatModelTabs\n",
    "  customVarName=\"llm\"\n",
-    "  anthropicParams={`\"model=\"claude-3-sonnet-20240229\", temperature=0.2, max_tokens=1024\"`}\n",
+    "  anthropicParams={`model=\"claude-3-sonnet-20240229\", temperature=0.2, max_tokens=1024`}\n",
    "/>\n",
    "```\n",
    "\n",
@@ -957,7 +957,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.11.5"
  }
 },
 "nbformat": 4,
--- a/docs/docs/tutorials/retrievers.ipynb
+++ b/docs/docs/tutorials/retrievers.ipynb
@@ -337,8 +337,6 @@
    }
   ],
   "source": [
-    "from typing import List\n",
-    "\n",
    "from langchain_core.documents import Document\n",
    "from langchain_core.runnables import RunnableLambda\n",
    "\n",
--- a/docs/docs/tutorials/summarization.ipynb
+++ b/docs/docs/tutorials/summarization.ipynb
@@ -130,7 +130,7 @@
    "\n",
    "2. `Map-reduce`: Summarize each document on its own in a \"map\" step and then \"reduce\" the summaries into a final summary (see [here](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain.html) for more on the `MapReduceDocumentsChain`, which is used for this method).\n",
    "\n",
-    "Note that map-reduce is especially effective when understanding of a sub-document does not rely on preceeding context. For example, when summarizing a corpus of many, shorter documents. In other cases, such as summarizing a novel or body of text with an inherent sequence, [iterative refinement](/docs/how_to/summarize_refine) may be more effective."
+    "Note that map-reduce is especially effective when understanding of a sub-document does not rely on preceding context. For example, when summarizing a corpus of many, shorter documents. In other cases, such as summarizing a novel or body of text with an inherent sequence, [iterative refinement](/docs/how_to/summarize_refine) may be more effective."
   ]
  },
  {
@@ -346,7 +346,7 @@
    "\n",
    "Note that the map step is typically parallelized over the input documents.\n",
    "\n",
-    "[LangGraph](https://langchain-ai.github.io/langgraph/), built on top of `langchain-core`, suports [map-reduce](https://langchain-ai.github.io/langgraph/how-tos/map-reduce/) workflows and is well-suited to this problem:\n",
+    "[LangGraph](https://langchain-ai.github.io/langgraph/), built on top of `langchain-core`, supports [map-reduce](https://langchain-ai.github.io/langgraph/how-tos/map-reduce/) workflows and is well-suited to this problem:\n",
    "\n",
    "- LangGraph allows for individual steps (such as successive summarizations) to be streamed, allowing for greater control of execution;\n",
    "- LangGraph's [checkpointing](https://langchain-ai.github.io/langgraph/how-tos/persistence/) supports error recovery, extending with human-in-the-loop workflows, and easier incorporation into conversational applications.\n",
--- a/docs/scripts/arxiv_references.py
+++ b/docs/scripts/arxiv_references.py
@@ -397,7 +397,7 @@ class ArxivAPIWrapper(BaseModel):


 def _format_doc_url(doc_path: str) -> str:
-    return f"https://{LANGCHAIN_PYTHON_URL}/{doc_path}"
+    return f"https://{LANGCHAIN_PYTHON_URL}/v0.2/{doc_path}"


 def _format_api_ref_url(doc_path: str, compact: bool = False) -> str:
@@ -523,10 +523,9 @@ This page contains `arXiv` papers referenced in the LangChain Documentation, API
 Templates, and Cookbooks.

 From the opposite direction, scientists use `LangChain` in research and reference it in the research papers. 
-Here you find papers that reference:
- [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header)
- [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header)
- [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)
+
+`arXiv` papers with references to:
+ [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header) | [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header) | [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)

 ## Summary

@@ -564,7 +563,7 @@ Here you find papers that reference:
                refs += [
                    "`Cookbook:` "
                    + ", ".join(
-                        f"[{key}]({url})"
+                        f"[{str(key).replace('_', ' ').title()}]({url})"
                        for key, url in paper.referencing_cookbook2url.items()
                    )
                ]
@@ -572,7 +571,7 @@ Here you find papers that reference:

            title_link = f"[{paper.title}]({paper.url})"
            f.write(
-                f"| {' | '.join([f'`{paper.arxiv_id}` {title_link}', ', '.join(paper.authors), paper.published_date, refs_str])}\n"
+                f"| {' | '.join([f'`{paper.arxiv_id}` {title_link}', ', '.join(paper.authors), paper.published_date.replace('-', '&#8209;'), refs_str])}\n"
            )

        for paper in papers:
@@ -607,9 +606,8 @@ Here you find papers that reference:
                f"""
 ## {paper.title}

- **arXiv id:** [{paper.arxiv_id}]({paper.url})  **Published Date:** {paper.published_date}
- **Title:** {paper.title}
 - **Authors:** {', '.join(paper.authors)}
+- **arXiv id:** [{paper.arxiv_id}]({paper.url})  **Published Date:** {paper.published_date}
 - **LangChain:**

 {refs}
--- a/docs/scripts/check_templates.py
+++ b/docs/scripts/check_templates.py
@@ -66,19 +66,22 @@ def check_header_order(path: Path) -> None:

    with open(path, "r") as f:
        doc = f.read()
-    regex = r".*".join(headers)
-    if not re.search(regex, doc, re.DOTALL):
-        issueline = (
-            (
-                " Please see https://github.com/langchain-ai/langchain/issues/"
-                f"{issue_number} for instructions on how to correctly format a "
-                f"{doc_dir} integration page."
-            )
-            if isinstance(issue_number, int)
-            else ""
-        )
+    notfound = []
+    for header in headers:
+        index = doc.find(header)
+        if index == -1:
+            notfound.append(header)
+        doc = doc[index + len(header) :]
+    if notfound:
+        notfound_headers = "\n- ".join(notfound)
        raise ValueError(
-            f"Document {path} does not match the expected header order.{issueline}"
+            f"Document {path} is missing headers:"
+            "\n- "
+            f"{notfound_headers}"
+            "\n\n"
+            "Please see https://github.com/langchain-ai/langchain/issues/"
+            f"{issue_number} for instructions on how to correctly format a "
+            f"{doc_dir} integration page."
        )


--- a/docs/scripts/partner_pkg_table.py
+++ b/docs/scripts/partner_pkg_table.py
@@ -15,6 +15,7 @@ EXTERNAL_PACKAGES = {
    "astradb",
    "aws",
    "cohere",
+    "databricks",
    "elasticsearch",
    "google-community",
    "google-genai",
--- a/docs/scripts/tool_feat_table.py
+++ b/docs/scripts/tool_feat_table.py
@@ -62,6 +62,11 @@ SEARCH_TOOL_FEAT_TABLE = {
        "available_data": "Answer",
        "link": "/docs/integrations/tools/serpapi",
    },
+    "Jina Search": {
+        "pricing": "1M Response Tokens Free",
+        "available_data": "URL, Snippet, Title, Page Content",
+        "link": "/docs/integrations/tools/jina_search/",
+    },
 }

 CODE_INTERPRETER_TOOL_FEAT_TABLE = {
@@ -71,6 +76,7 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
        "upload": True,
        "return_results": "Text",
        "link": "/docs/integrations/tools/bearly",
+        "self_hosting": False,
    },
    "Riza Code Interpreter": {
        "langauges": "Python, JavaScript, PHP, Ruby",
@@ -78,6 +84,7 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
        "upload": False,
        "return_results": "Text",
        "link": "/docs/integrations/tools/riza",
+        "self_hosting": True,
    },
    "E2B Data Analysis": {
        "langauges": "Python. In beta: JavaScript, R, Java",
@@ -85,6 +92,7 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
        "upload": True,
        "return_results": "Text, Images, Videos",
        "link": "/docs/integrations/tools/e2b_data_analysis",
+        "self_hosting": True,
    },
    "Azure Container Apps dynamic sessions": {
        "langauges": "Python",
@@ -92,6 +100,7 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
        "upload": True,
        "return_results": "Text, Images",
        "link": "/docs/integrations/tools/azure_dynamic_sessions",
+        "self_hosting": False,
    },
 }

@@ -301,13 +310,14 @@ def get_search_tools_table() -> str:


 def get_code_interpreter_table() -> str:
-    """Get the table of search tools."""
+    """Get the table of code interpreter tools."""
    header = [
        "tool",
        "langauges",
        "sandbox_lifetime",
        "upload",
        "return_results",
+        "self_hosting",
    ]
    title = [
        "Tool/Toolkit",
@@ -315,6 +325,7 @@ def get_code_interpreter_table() -> str:
        "Sandbox Lifetime",
        "Supports File Uploads",
        "Return Types",
+        "Supports Self-Hosting",
    ]
    rows = [title, [":-"] + [":-:"] * (len(title) - 1)]
    for search_tool, feats in sorted(CODE_INTERPRETER_TOOL_FEAT_TABLE.items()):
@@ -324,7 +335,7 @@ def get_code_interpreter_table() -> str:
        ]
        for h in header[1:]:
            value = feats.get(h)
-            if h == "upload":
+            if h == "upload" or h == "self_hosting":
                if value is True:
                    row.append("✅")
                else:
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -398,6 +398,7 @@ module.exports = {
        { type: "doc", id: "contributing/documentation/style_guide", className: "hidden" },
        { type: "doc", id: "contributing/documentation/setup", className: "hidden" },
        "contributing/testing",
+        "contributing/review_process",
        "contributing/faq",
      ],
      collapsible: false,
--- a/docs/src/theme/FeatureTables.js
+++ b/docs/src/theme/FeatureTables.js
@@ -204,6 +204,17 @@ const FEATURE_TABLES = {
                "multimodal": false,
                "local": false,
                "apiLink": "https://python.langchain.com/v0.2/api_reference/upstage/chat_models/langchain_upstage.chat_models.ChatUpstage.html"
+            },
+            {
+                "name": "ChatDatabricks",
+                "package": "langchain-databricks",
+                "link": "databricks",
+                "structured_output": true,
+                "tool_calling": true,
+                "json_mode": false, 
+                "multimodal": false,
+                "local": false,
+                "apiLink": "https://python.langchain.com/v0.2/api_reference/upstage/chat_models/langchain_databricks.chat_models.ChatDatabricks.html"
            }
        ],
    },
@@ -347,6 +358,12 @@ const FEATURE_TABLES = {
                package: "langchain-nomic",
                apiLink: "https://python.langchain.com/v0.2/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html"
            },
+            {
+                name: "Databricks",
+                link: "databricks",
+                package: "langchain-databricks",
+                apiLink: "https://python.langchain.com/v0.2/api_reference/nomic/embeddings/langchain_databricks.embeddings.DatabricksEmbeddings.html"
+            },
        ]
    },
    document_retrievers: {
@@ -890,7 +907,7 @@ const FEATURE_TABLES = {
            {title: "Passes Standard Tests", formatter: (item) => item.passesStandardTests ? "✅" : "❌"},
            {title: "Multi Tenancy", formatter: (item) => item.multiTenancy ? "✅" : "❌"},
            {title: "IDs in add Documents", formatter: (item) => item.idsInAddDocuments ? "✅" : "❌"},
-            {title: "Local/Cloud", formatter: (item) => item.local ? "Local" : "Cloud"},
+            // {title: "Local/Cloud", formatter: (item) => item.local ? "Local" : "Cloud"},
        ],
        items: [
            {
@@ -945,13 +962,26 @@ const FEATURE_TABLES = {
                local: true,
                idsInAddDocuments: false,
            },
+            {
+                name: "DatabricksVectorSearch",
+                link: "databricks_vector_search",
+                deleteById: true,
+                filtering: true,
+                searchByVector: true,
+                searchWithScore: true,
+                async: true,
+                passesStandardTests: false,
+                multiTenancy: false,
+                local: false,
+                idsInAddDocuments: false,
+            },
            {
                name: "ElasticsearchStore",
                link: "elasticsearch",
                deleteById: true,
                filtering: true,
                searchByVector: true,
-                searchWithScore: false,
+                searchWithScore: true,
                async: true,
                passesStandardTests: false,
                multiTenancy: false,
@@ -973,7 +1003,7 @@ const FEATURE_TABLES = {
            },
            {
                name: "InMemoryVectorStore",
-                link: "in_memory",
+                link: "https://python.langchain.com/v0.2/api_reference/core/vectorstores/langchain_core.vectorstores.in_memory.InMemoryVectorStore.html",
                deleteById: true,
                filtering: true,
                searchByVector: false,
@@ -1012,7 +1042,7 @@ const FEATURE_TABLES = {
            },
            {
                name: "PGVector",
-                link: "pg_vector",
+                link: "pgvector",
                deleteById: true,
                filtering: true,
                searchByVector: true,
--- a/docs/static/img/review_process_status.png
+++ b/docs/static/img/review_process_status.png
--- a/docs/vercel.json
+++ b/docs/vercel.json
@@ -4,6 +4,10 @@
  "ignoreCommand": "bash ignore-step.sh",
  "trailingSlash": true,
  "rewrites": [
+    {
+      "source": "/v0.2/docs/integrations(/?)",
+      "destination": "/v0.2/docs/integrations/platforms/"
+    },
    {
      "source": "/v0.1",
      "destination": "https://langchain-v01.vercel.app/v0.1"
--- a/libs/cli/langchain_cli/integration_template/Makefile
+++ b/libs/cli/langchain_cli/integration_template/Makefile
@@ -12,6 +12,9 @@ integration_test integration_tests: TEST_FILE = tests/integration_tests/
 test tests:
 	poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)

+test_watch:
+	poetry run ptw --snapshot-update --now . -- -vv $(TEST_FILE)
+
 # integration tests are run without the --disable-socket flag to allow network calls
 integration_test integration_tests:
 	poetry run pytest $(TEST_FILE)
--- a/libs/cli/langchain_cli/integration_template/pyproject.toml
+++ b/libs/cli/langchain_cli/integration_template/pyproject.toml
@@ -23,6 +23,7 @@ pytest = "^7.4.3"
 pytest-asyncio = "^0.23.2"
 pytest-socket = "^0.7.0"
 langchain-core = { path = "../../core", develop = true }
+pytest-watcher = "^0.3.4"

 [tool.poetry.group.codespell]
 optional = true
--- a/libs/community/Makefile
+++ b/libs/community/Makefile
@@ -22,7 +22,7 @@ integration_tests:
 	poetry run pytest $(TEST_FILE)

 test_watch:
-	poetry run ptw --disable-socket --allow-unix-socket --snapshot-update --now . -- -vv -x tests/unit_tests
+	poetry run ptw --disable-socket --allow-unix-socket --snapshot-update --now . -- -vv tests/unit_tests

 check_imports: $(shell find langchain_community -name '*.py')
 	poetry run python ./scripts/check_imports.py $^
@@ -45,7 +45,6 @@ lint_tests: PYTHON_FILES=tests
 lint_tests: MYPY_CACHE=.mypy_cache_test

 lint lint_diff lint_package lint_tests:
-	./scripts/check_pydantic.sh .
 	./scripts/lint_imports.sh .
 	./scripts/check_pickle.sh .
 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
--- a/libs/community/extended_testing_deps.txt
+++ b/libs/community/extended_testing_deps.txt
@@ -16,7 +16,6 @@ cloudpickle>=2.0.0
 cohere>=4,<6
 databricks-vectorsearch>=0.21,<0.22
 datasets>=2.15.0,<3
-dedoc>=2.2.6,<3
 dgml-utils>=0.3.0,<0.4
 elasticsearch>=8.12.0,<9
 esprima>=4.0.1,<5
@@ -92,3 +91,4 @@ xata>=1.0.0a7,<2
 xmltodict>=0.13.0,<0.14
 nanopq==0.2.1
 mlflow[genai]>=2.14.0
+databricks-sdk>=0.30.0
--- a/libs/community/langchain_community/adapters/openai.py
+++ b/libs/community/langchain_community/adapters/openai.py
@@ -25,7 +25,7 @@ from langchain_core.messages import (
    SystemMessage,
    ToolMessage,
 )
-from langchain_core.pydantic_v1 import BaseModel
+from pydantic import BaseModel
 from typing_extensions import Literal


--- a/libs/community/langchain_community/agent_toolkits/ainetwork/toolkit.py
+++ b/libs/community/langchain_community/agent_toolkits/ainetwork/toolkit.py
@@ -1,10 +1,10 @@
 from __future__ import annotations

-from typing import TYPE_CHECKING, List, Literal, Optional
+from typing import TYPE_CHECKING, Any, List, Literal, Optional

-from langchain_core.pydantic_v1 import root_validator
 from langchain_core.tools import BaseTool
 from langchain_core.tools.base import BaseToolkit
+from pydantic import ConfigDict, model_validator

 from langchain_community.tools.ainetwork.app import AINAppOps
 from langchain_community.tools.ainetwork.owner import AINOwnerOps
@@ -36,8 +36,9 @@ class AINetworkToolkit(BaseToolkit):
    network: Optional[Literal["mainnet", "testnet"]] = "testnet"
    interface: Optional[Ain] = None

-    @root_validator(pre=True)
-    def set_interface(cls, values: dict) -> dict:
+    @model_validator(mode="before")
+    @classmethod
+    def set_interface(cls, values: dict) -> Any:
        """Set the interface if not provided.

        If the interface is not provided, attempt to authenticate with the
@@ -53,9 +54,10 @@ class AINetworkToolkit(BaseToolkit):
            values["interface"] = authenticate(network=values.get("network", "testnet"))
        return values

-    class Config:
-        arbitrary_types_allowed = True
-        validate_all = True
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True,
+        validate_default=True,
+    )

    def get_tools(self) -> List[BaseTool]:
        """Get the tools in the toolkit."""
--- a/libs/community/langchain_community/agent_toolkits/amadeus/toolkit.py
+++ b/libs/community/langchain_community/agent_toolkits/amadeus/toolkit.py
@@ -3,9 +3,9 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, List, Optional

 from langchain_core.language_models import BaseLanguageModel
-from langchain_core.pydantic_v1 import Field
 from langchain_core.tools import BaseTool
 from langchain_core.tools.base import BaseToolkit
+from pydantic import ConfigDict, Field

 from langchain_community.tools.amadeus.closest_airport import AmadeusClosestAirport
 from langchain_community.tools.amadeus.flight_search import AmadeusFlightSearch
@@ -26,8 +26,9 @@ class AmadeusToolkit(BaseToolkit):
    client: Client = Field(default_factory=authenticate)
    llm: Optional[BaseLanguageModel] = Field(default=None)

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True,
+    )

    def get_tools(self) -> List[BaseTool]:
        """Get the tools in the toolkit."""
--- a/libs/community/langchain_community/agent_toolkits/cassandra_database/toolkit.py
+++ b/libs/community/langchain_community/agent_toolkits/cassandra_database/toolkit.py
@@ -2,9 +2,9 @@

 from typing import List

-from langchain_core.pydantic_v1 import Field
 from langchain_core.tools import BaseTool
 from langchain_core.tools.base import BaseToolkit
+from pydantic import ConfigDict, Field

 from langchain_community.tools.cassandra_database.tool import (
    GetSchemaCassandraDatabaseTool,
@@ -24,8 +24,9 @@ class CassandraDatabaseToolkit(BaseToolkit):

    db: CassandraDatabase = Field(exclude=True)

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True,
+    )

    def get_tools(self) -> List[BaseTool]:
        """Get the tools in the toolkit."""
--- a/libs/community/langchain_community/agent_toolkits/connery/toolkit.py
+++ b/libs/community/langchain_community/agent_toolkits/connery/toolkit.py
@@ -1,8 +1,8 @@
-from typing import List
+from typing import Any, List

-from langchain_core.pydantic_v1 import root_validator
 from langchain_core.tools import BaseTool
 from langchain_core.tools.base import BaseToolkit
+from pydantic import model_validator

 from langchain_community.tools.connery import ConneryService

@@ -23,8 +23,9 @@ class ConneryToolkit(BaseToolkit):
        """
        return self.tools

-    @root_validator(pre=True)
-    def validate_attributes(cls, values: dict) -> dict:
+    @model_validator(mode="before")
+    @classmethod
+    def validate_attributes(cls, values: dict) -> Any:
        """
        Validate the attributes of the ConneryToolkit class.

--- a/Show More
+++ b/Show More