make some changes

cr
Merge branch 'master' into v0.3.dev
2026-02-08 02:00:06 +00:00 · 2024-09-02 14:25:45 -07:00 · 2024-09-02 14:14:13 -07:00 · 2024-09-02 14:13:31 -07:00 · 2024-09-02 21:11:32 +00:00 · 2024-09-02 13:44:11 -07:00
427 changed files with 20648 additions and 24671 deletions
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -16,6 +16,18 @@ LANGCHAIN_DIRS = [
    "libs/experimental",
 ]

+# ignored partners are removed from dependents
+# but still run if directly edited
+IGNORED_PARTNERS = [
+    # remove huggingface from dependents because of CI instability
+    # specifically in huggingface jobs
+    # https://github.com/langchain-ai/langchain/issues/25558
+    "huggingface",
+    # remove ai21 because of breaking changes in sdk version 2.14.0
+    # that have not been fixed yet
+    "ai21",
+]
+

 def all_package_dirs() -> Set[str]:
    return {
@@ -69,12 +81,10 @@ def dependents_graph() -> dict:
                    if "langchain" in dep:
                        dependents[dep].add(pkg_dir)

-    # remove huggingface from dependents because of CI instability
-    # specifically in huggingface jobs
-    # https://github.com/langchain-ai/langchain/issues/25558
    for k in dependents:
-        if "libs/partners/huggingface" in dependents[k]:
-            dependents[k].remove("libs/partners/huggingface")
+        for partner in IGNORED_PARTNERS:
+            if f"libs/partners/{partner}" in dependents[k]:
+                dependents[k].remove(f"libs/partners/{partner}")
    return dependents


--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -75,12 +75,11 @@ jobs:
          echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
          echo "min-versions=$min_versions"

-# Temporarily disabled until we can get the minimum versions working
-#      - name: Run unit tests with minimum dependency versions
-#        if: ${{ steps.min-version.outputs.min-versions != '' }}
-#        env:
-#          MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
-#        run: |
-#          poetry run pip install --force-reinstall $MIN_VERSIONS --editable .
-#          make tests
-#        working-directory: ${{ inputs.working-directory }}
+      - name: Run unit tests with minimum dependency versions
+        if: ${{ steps.min-version.outputs.min-versions != '' }}
+        env:
+          MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
+        run: |
+          poetry run pip install --force-reinstall $MIN_VERSIONS --editable .
+          make tests
+        working-directory: ${{ inputs.working-directory }}
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -43,7 +43,6 @@ jobs:
    name: cd ${{ matrix.job-configs.working-directory }}
    needs: [ build ]
    if: ${{ needs.build.outputs.lint != '[]' }}
-    continue-on-error: true
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.lint) }}
@@ -57,7 +56,6 @@ jobs:
    name: cd ${{ matrix.job-configs.working-directory }}
    needs: [ build ]
    if: ${{ needs.build.outputs.test != '[]' }}
-    continue-on-error: true
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.test) }}
@@ -82,7 +80,6 @@ jobs:
    name: cd ${{ matrix.job-configs.working-directory }}
    needs: [ build ]
    if: ${{ needs.build.outputs.compile-integration-tests != '[]' }}
-    continue-on-error: true
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.compile-integration-tests) }}
@@ -96,7 +93,6 @@ jobs:
    name: cd ${{ matrix.job-configs.working-directory }}
    needs: [ build ]
    if: ${{ needs.build.outputs.dependencies != '[]' }}
-    continue-on-error: true
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.dependencies) }}
@@ -110,7 +106,6 @@ jobs:
    name: "cd ${{ matrix.job-configs.working-directory }} / make extended_tests #${{ matrix.job-configs.python-version }}"
    needs: [ build ]
    if: ${{ needs.build.outputs.extended-tests != '[]' }}
-    continue-on-error: true
    strategy:
      matrix:
        # note different variable for extended test dirs
--- a/.github/workflows/scheduled_test.yml
+++ b/.github/workflows/scheduled_test.yml
@@ -22,11 +22,9 @@ jobs:
        working-directory:
          - "libs/partners/openai"
          - "libs/partners/anthropic"
-          - "libs/partners/ai21"
          - "libs/partners/fireworks"
          - "libs/partners/groq"
          - "libs/partners/mistralai"
-          - "libs/partners/together"
          - "libs/partners/google-vertexai"
          - "libs/partners/google-genai"
          - "libs/partners/aws"
@@ -90,11 +88,9 @@ jobs:
          AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
          AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
          AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
-          AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
-          TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
          COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
--- a/cookbook/README.md
+++ b/cookbook/README.md
@@ -4,6 +4,8 @@ Example code for building applications with LangChain, with an emphasis on more

 Notebook | Description
 :- | :-
+[agent_fireworks_ai_langchain_mongodb.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb) | Build an AI Agent With Memory Using MongoDB, LangChain and FireWorksAI.
+[mongodb-langchain-cache-memory.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/mongodb-langchain-cache-memory.ipynb) | Build a RAG Application with Semantic Cache Using MongoDB and LangChain.
 [LLaMA2_sql_chat.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/LLaMA2_sql_chat.ipynb) | Build a chat application that interacts with a SQL database using an open source llm (llama2), specifically demonstrated on an SQLite database containing rosters.
 [Semi_Structured_RAG.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_Structured_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data, including text and tables, using unstructured for parsing, multi-vector retriever for storing, and lcel for implementing chains.
 [Semi_structured_and_multi_moda...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using unstructured for parsing, multi-vector retriever for storage and retrieval, and lcel for implementing chains.
--- a/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb
+++ b/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb
--- a/cookbook/cql_agent.ipynb
+++ b/cookbook/cql_agent.ipynb
@@ -38,7 +38,7 @@
   "source": [
    "Connection is via `cassio` using `auto=True` parameter, and the notebook uses OpenAI. You should create a `.env` file accordingly.\n",
    "\n",
-    "For Casssandra, set:\n",
+    "For Cassandra, set:\n",
    "```bash\n",
    "CASSANDRA_CONTACT_POINTS\n",
    "CASSANDRA_USERNAME\n",
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -39,7 +39,6 @@ install-py-deps:
 generate-files:
 	mkdir -p $(INTERMEDIATE_DIR)
 	cp -r $(SOURCE_DIR)/* $(INTERMEDIATE_DIR)
-	mkdir -p $(INTERMEDIATE_DIR)/templates

 	$(PYTHON) scripts/tool_feat_table.py $(INTERMEDIATE_DIR)

@@ -47,8 +46,6 @@ generate-files:

 	$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR)

-	$(PYTHON) scripts/copy_templates.py $(INTERMEDIATE_DIR)
-
 	wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O $(INTERMEDIATE_DIR)/langserve.md
 	$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/

@@ -76,6 +73,8 @@ append-related:
 generate-references:
 	$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(OUTPUT_NEW_DOCS_DIR)

+update-md: generate-files md-sync
+
 build: install-py-deps generate-files copy-infra render md-sync append-related

 vercel-build: install-vercel-deps build generate-references
--- a/docs/data/people.yml
+++ b/docs/data/people.yml
--- a/docs/docs/additional_resources/arxiv_references.mdx
+++ b/docs/docs/additional_resources/arxiv_references.mdx
@@ -5,51 +5,89 @@ This page contains `arXiv` papers referenced in the LangChain Documentation, API
 Templates, and Cookbooks.

 From the opposite direction, scientists use `LangChain` in research and reference it in the research papers. 
-Here you find papers that reference:
- [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header)
- [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header)
- [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)
+
+`arXiv` papers with references to:
+ [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header) | [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header) | [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)

 ## Summary

 | arXiv id / Title | Authors | Published date 🔻 | LangChain Documentation|
 |------------------|---------|-------------------|------------------------|
-| `2402.03620v1` [Self-Discover: Large Language Models Self-Compose Reasoning Structures](http://arxiv.org/abs/2402.03620v1) | Pei Zhou, Jay Pujara, Xiang Ren,  et al. | 2024-02-06 | `Cookbook:` [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
-| `2401.18059v1` [RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval](http://arxiv.org/abs/2401.18059v1) | Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al. | 2024-01-31 | `Cookbook:` [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
-| `2401.15884v2` [Corrective Retrieval Augmented Generation](http://arxiv.org/abs/2401.15884v2) | Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al. | 2024-01-29 | `Cookbook:` [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
-| `2401.04088v1` [Mixtral of Experts](http://arxiv.org/abs/2401.04088v1) | Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al. | 2024-01-08 | `Cookbook:` [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
-| `2312.06648v2` [Dense X Retrieval: What Retrieval Granularity Should We Use?](http://arxiv.org/abs/2312.06648v2) | Tong Chen, Hongwei Wang, Sihao Chen,  et al. | 2023-12-11 | `Template:` [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
-| `2311.09210v1` [Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models](http://arxiv.org/abs/2311.09210v1) | Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al. | 2023-11-15 | `Template:` [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
-| `2310.11511v1` [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511v1) | Akari Asai, Zeqiu Wu, Yizhong Wang,  et al. | 2023-10-17 | `Cookbook:` [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
-| `2310.06117v2` [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](http://arxiv.org/abs/2310.06117v2) | Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al. | 2023-10-09 | `Template:` [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting), `Cookbook:` [stepback-qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)
-| `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023-07-18 | `Cookbook:` [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
-| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023-05-23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
-| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023-05-15 | `API:` [langchain_experimental.tot](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.tot), `Cookbook:` [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
-| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023-05-06 | `Cookbook:` [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
-| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023-05-03 | `API:` [langchain...LLMListwiseRerank](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
-| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023-04-17 | `Cookbook:` [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
-| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023-04-07 | `Cookbook:` [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
-| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023-03-31 | `Cookbook:` [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
-| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023-03-30 | `API:` [langchain_experimental.autonomous_agents](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
-| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023-01-24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022-12-20 | `API:` [langchain...HypotheticalDocumentEmbedder](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
-| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022-12-12 | `API:` [langchain_experimental.fallacy_removal](https://python.langchain.com/v0.2/api_reference//arxiv/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
-| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022-11-25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
-| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022-11-18 | `API:` [langchain_experimental.pal_chain](https://python.langchain.com/v0.2/api_reference//python/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://python.langchain.com/v0.2/api_reference/experimental/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
-| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022-10-06 | `Docs:` [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), `API:` [langchain...TrajectoryEvalChain](https://python.langchain.com/v0.2/api_reference/langchain/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
-| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022-09-22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
-| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al. | 2022-05-26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
-| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022-05-25 | `API:` [langchain_community...LaserEmbeddings](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
-| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022-03-15 | `API:` [langchain_community...SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
-| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022-02-01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021-02-26 | `API:` [langchain_experimental.open_clip](https://python.langchain.com/v0.2/api_reference//arxiv/experimental_api_reference.html#module-langchain_experimental.open_clip)
-| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019-09-11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+| `2403.14403v2` [Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity](http://arxiv.org/abs/2403.14403v2) | Soyeong Jeong, Jinheon Baek, Sukmin Cho,  et al. | 2024&#8209;03&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2402.03620v1` [Self-Discover: Large Language Models Self-Compose Reasoning Structures](http://arxiv.org/abs/2402.03620v1) | Pei Zhou, Jay Pujara, Xiang Ren,  et al. | 2024&#8209;02&#8209;06 | `Cookbook:` [Self-Discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
+| `2402.03367v2` [RAG-Fusion: a New Take on Retrieval-Augmented Generation](http://arxiv.org/abs/2402.03367v2) | Zackary Rackauckas | 2024&#8209;01&#8209;31 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2401.18059v1` [RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval](http://arxiv.org/abs/2401.18059v1) | Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al. | 2024&#8209;01&#8209;31 | `Cookbook:` [Raptor](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
+| `2401.15884v2` [Corrective Retrieval Augmented Generation](http://arxiv.org/abs/2401.15884v2) | Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al. | 2024&#8209;01&#8209;29 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Cookbook:` [Langgraph Crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
+| `2401.08500v1` [Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering](http://arxiv.org/abs/2401.08500v1) | Tal Ridnik, Dedy Kredo, Itamar Friedman | 2024&#8209;01&#8209;16 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2401.04088v1` [Mixtral of Experts](http://arxiv.org/abs/2401.04088v1) | Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al. | 2024&#8209;01&#8209;08 | `Cookbook:` [Together Ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
+| `2312.06648v2` [Dense X Retrieval: What Retrieval Granularity Should We Use?](http://arxiv.org/abs/2312.06648v2) | Tong Chen, Hongwei Wang, Sihao Chen,  et al. | 2023&#8209;12&#8209;11 | `Template:` [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
+| `2311.09210v1` [Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models](http://arxiv.org/abs/2311.09210v1) | Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al. | 2023&#8209;11&#8209;15 | `Template:` [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
+| `2310.11511v1` [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511v1) | Akari Asai, Zeqiu Wu, Yizhong Wang,  et al. | 2023&#8209;10&#8209;17 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Cookbook:` [Langgraph Self Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
+| `2310.06117v2` [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](http://arxiv.org/abs/2310.06117v2) | Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al. | 2023&#8209;10&#8209;09 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Template:` [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting), `Cookbook:` [Stepback-Qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)
+| `2307.15337v3` [Skeleton-of-Thought: Prompting LLMs for Efficient Parallel Generation](http://arxiv.org/abs/2307.15337v3) | Xuefei Ning, Zinan Lin, Zixuan Zhou,  et al. | 2023&#8209;07&#8209;28 | `Template:` [skeleton-of-thought](https://python.langchain.com/docs/templates/skeleton-of-thought)
+| `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023&#8209;07&#8209;18 | `Cookbook:` [Semi Structured Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
+| `2307.03172v3` [Lost in the Middle: How Language Models Use Long Contexts](http://arxiv.org/abs/2307.03172v3) | Nelson F. Liu, Kevin Lin, John Hewitt,  et al. | 2023&#8209;07&#8209;06 | `Docs:` [docs/how_to/long_context_reorder](https://python.langchain.com/v0.2/docs/how_to/long_context_reorder)
+| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023&#8209;05&#8209;23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [Rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
+| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023&#8209;05&#8209;15 | `API:` [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot), `Cookbook:` [Tree Of Thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
+| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023&#8209;05&#8209;06 | `Cookbook:` [Plan And Execute Agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
+| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023&#8209;05&#8209;03 | `Docs:` [docs/how_to/contextual_compression](https://python.langchain.com/v0.2/docs/how_to/contextual_compression), `API:` [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023&#8209;04&#8209;17 | `Cookbook:` [Semi Structured Multi Modal Rag Llama2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi Structured And Multi Modal Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)
+| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023&#8209;04&#8209;07 | `Cookbook:` [Generative Agents Interactive Simulacra Of Human Behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [Multiagent Bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)
+| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023&#8209;03&#8209;31 | `Cookbook:` [Camel Role Playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
+| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023&#8209;03&#8209;30 | `API:` [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [Hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
+| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023&#8209;01&#8209;24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
+| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022&#8209;12&#8209;20 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `API:` [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [Hypothetical Document Embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
+| `2212.08073v1` [Constitutional AI: Harmlessness from AI Feedback](http://arxiv.org/abs/2212.08073v1) | Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al. | 2022&#8209;12&#8209;15 | `Docs:` [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/v0.2/docs/versions/migrating_chains/constitutional_chain)
+| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022&#8209;12&#8209;12 | `API:` [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
+| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022&#8209;11&#8209;25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
+| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022&#8209;11&#8209;18 | `API:` [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [Program Aided Language Model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
+| `2210.11934v2` [An Analysis of Fusion Functions for Hybrid Retrieval](http://arxiv.org/abs/2210.11934v2) | Sebastian Bruch, Siyu Gai, Amir Ingber | 2022&#8209;10&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022&#8209;10&#8209;06 | `Docs:` [docs/integrations/tools/ionic_shopping](https://python.langchain.com/v0.2/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/v0.2/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `API:` [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
+| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022&#8209;09&#8209;22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/v0.2/docs/integrations/providers/activeloop_deeplake)
+| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al. | 2022&#8209;05&#8209;26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/v0.2/docs/integrations/providers/snowflake)
+| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022&#8209;05&#8209;25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
+| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022&#8209;03&#8209;15 | `Docs:` [docs/tutorials/sql_qa](https://python.langchain.com/v0.2/docs/tutorials/sql_qa), `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
+| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022&#8209;02&#8209;01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
+| `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al. | 2021&#8209;12&#8209;02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/v0.2/docs/integrations/providers/dspy)
+| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021&#8209;02&#8209;26 | `API:` [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
+| `2005.14165v4` [Language Models are Few-Shot Learners](http://arxiv.org/abs/2005.14165v4) | Tom B. Brown, Benjamin Mann, Nick Ryder,  et al. | 2020&#8209;05&#8209;28 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2005.11401v4` [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](http://arxiv.org/abs/2005.11401v4) | Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al. | 2020&#8209;05&#8209;22 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019&#8209;09&#8209;11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

+## Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity
+
+- **Authors:** Soyeong Jeong, Jinheon Baek, Sukmin Cho,  et al.
+- **arXiv id:** [2403.14403v2](http://arxiv.org/abs/2403.14403v2)  **Published Date:** 2024-03-21
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Retrieval-Augmented Large Language Models (LLMs), which incorporate the
+non-parametric knowledge from external knowledge bases into LLMs, have emerged
+as a promising approach to enhancing response accuracy in several tasks, such
+as Question-Answering (QA). However, even though there are various approaches
+dealing with queries of different complexities, they either handle simple
+queries with unnecessary computational overhead or fail to adequately address
+complex multi-step queries; yet, not all user requests fall into only one of
+the simple or complex categories. In this work, we propose a novel adaptive QA
+framework, that can dynamically select the most suitable strategy for
+(retrieval-augmented) LLMs from the simplest to the most sophisticated ones
+based on the query complexity. Also, this selection process is operationalized
+with a classifier, which is a smaller LM trained to predict the complexity
+level of incoming queries with automatically collected labels, obtained from
+actual predicted outcomes of models and inherent inductive biases in datasets.
+This approach offers a balanced strategy, seamlessly adapting between the
+iterative and single-step retrieval-augmented LLMs, as well as the no-retrieval
+methods, in response to a range of query complexities. We validate our model on
+a set of open-domain QA datasets, covering multiple query complexities, and
+show that ours enhances the overall efficiency and accuracy of QA systems,
+compared to relevant baselines including the adaptive retrieval approaches.
+Code is available at: https://github.com/starsuzi/Adaptive-RAG.
+                
 ## Self-Discover: Large Language Models Self-Compose Reasoning Structures

- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1)  **Published Date:** 2024-02-06
- **Title:** Self-Discover: Large Language Models Self-Compose Reasoning Structures
 - **Authors:** Pei Zhou, Jay Pujara, Xiang Ren,  et al.
+- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1)  **Published Date:** 2024-02-06
 - **LangChain:**

   - **Cookbook:** [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
@@ -69,11 +107,33 @@ the self-discovered reasoning structures are universally applicable across
 model families: from PaLM 2-L to GPT-4, and from GPT-4 to Llama2, and share
 commonalities with human reasoning patterns.
                
+## RAG-Fusion: a New Take on Retrieval-Augmented Generation
+
+- **Authors:** Zackary Rackauckas
+- **arXiv id:** [2402.03367v2](http://arxiv.org/abs/2402.03367v2)  **Published Date:** 2024-01-31
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Infineon has identified a need for engineers, account managers, and customers
+to rapidly obtain product information. This problem is traditionally addressed
+with retrieval-augmented generation (RAG) chatbots, but in this study, I
+evaluated the use of the newly popularized RAG-Fusion method. RAG-Fusion
+combines RAG and reciprocal rank fusion (RRF) by generating multiple queries,
+reranking them with reciprocal scores and fusing the documents and scores.
+Through manually evaluating answers on accuracy, relevance, and
+comprehensiveness, I found that RAG-Fusion was able to provide accurate and
+comprehensive answers due to the generated queries contextualizing the original
+query from various perspectives. However, some answers strayed off topic when
+the generated queries' relevance to the original query is insufficient. This
+research marks significant progress in artificial intelligence (AI) and natural
+language processing (NLP) applications and demonstrates transformations in a
+global and multi-industry context.
+                
 ## RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval

- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1)  **Published Date:** 2024-01-31
- **Title:** RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval
 - **Authors:** Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al.
+- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1)  **Published Date:** 2024-01-31
 - **LangChain:**

   - **Cookbook:** [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
@@ -95,11 +155,11 @@ benchmark by 20% in absolute accuracy.
                
 ## Corrective Retrieval Augmented Generation

- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2)  **Published Date:** 2024-01-29
- **Title:** Corrective Retrieval Augmented Generation
 - **Authors:** Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al.
+- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2)  **Published Date:** 2024-01-29
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Cookbook:** [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)

 **Abstract:** Large language models (LLMs) inevitably exhibit hallucinations since the
@@ -121,11 +181,36 @@ RAG-based approaches. Experiments on four datasets covering short- and
 long-form generation tasks show that CRAG can significantly improve the
 performance of RAG-based approaches.
                
+## Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering
+
+- **Authors:** Tal Ridnik, Dedy Kredo, Itamar Friedman
+- **arXiv id:** [2401.08500v1](http://arxiv.org/abs/2401.08500v1)  **Published Date:** 2024-01-16
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Code generation problems differ from common natural language problems - they
+require matching the exact syntax of the target language, identifying happy
+paths and edge cases, paying attention to numerous small details in the problem
+spec, and addressing other code-specific issues and requirements. Hence, many
+of the optimizations and tricks that have been successful in natural language
+generation may not be effective for code tasks. In this work, we propose a new
+approach to code generation by LLMs, which we call AlphaCodium - a test-based,
+multi-stage, code-oriented iterative flow, that improves the performances of
+LLMs on code problems. We tested AlphaCodium on a challenging code generation
+dataset called CodeContests, which includes competitive programming problems
+from platforms such as Codeforces. The proposed flow consistently and
+significantly improves results. On the validation set, for example, GPT-4
+accuracy (pass@5) increased from 19% with a single well-designed direct prompt
+to 44% with the AlphaCodium flow. Many of the principles and best practices
+acquired in this work, we believe, are broadly applicable to general code
+generation tasks. Full implementation is available at:
+https://github.com/Codium-ai/AlphaCodium
+                
 ## Mixtral of Experts

- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1)  **Published Date:** 2024-01-08
- **Title:** Mixtral of Experts
 - **Authors:** Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al.
+- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1)  **Published Date:** 2024-01-08
 - **LangChain:**

   - **Cookbook:** [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
@@ -147,9 +232,8 @@ the base and instruct models are released under the Apache 2.0 license.
                
 ## Dense X Retrieval: What Retrieval Granularity Should We Use?

- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2)  **Published Date:** 2023-12-11
- **Title:** Dense X Retrieval: What Retrieval Granularity Should We Use?
 - **Authors:** Tong Chen, Hongwei Wang, Sihao Chen,  et al.
+- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2)  **Published Date:** 2023-12-11
 - **LangChain:**

   - **Template:** [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
@@ -174,9 +258,8 @@ information.
                
 ## Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models

- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1)  **Published Date:** 2023-11-15
- **Title:** Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models
 - **Authors:** Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al.
+- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1)  **Published Date:** 2023-11-15
 - **LangChain:**

   - **Template:** [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
@@ -206,11 +289,11 @@ outside the pre-training knowledge scope.
                
 ## Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection

- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1)  **Published Date:** 2023-10-17
- **Title:** Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection
 - **Authors:** Akari Asai, Zeqiu Wu, Yizhong Wang,  et al.
+- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1)  **Published Date:** 2023-10-17
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Cookbook:** [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)

 **Abstract:** Despite their remarkable capabilities, large language models (LLMs) often
@@ -237,11 +320,11 @@ to these models.
                
 ## Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models

- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2)  **Published Date:** 2023-10-09
- **Title:** Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models
 - **Authors:** Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al.
+- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2)  **Published Date:** 2023-10-09
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Template:** [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting)
   - **Cookbook:** [stepback-qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)

@@ -256,11 +339,31 @@ including STEM, Knowledge QA, and Multi-Hop Reasoning. For instance, Step-Back
 Prompting improves PaLM-2L performance on MMLU (Physics and Chemistry) by 7%
 and 11% respectively, TimeQA by 27%, and MuSiQue by 7%.
                
+## Skeleton-of-Thought: Prompting LLMs for Efficient Parallel Generation
+
+- **Authors:** Xuefei Ning, Zinan Lin, Zixuan Zhou,  et al.
+- **arXiv id:** [2307.15337v3](http://arxiv.org/abs/2307.15337v3)  **Published Date:** 2023-07-28
+- **LangChain:**
+
+   - **Template:** [skeleton-of-thought](https://python.langchain.com/docs/templates/skeleton-of-thought)
+
+**Abstract:** This work aims at decreasing the end-to-end generation latency of large
+language models (LLMs). One of the major causes of the high generation latency
+is the sequential decoding approach adopted by almost all state-of-the-art
+LLMs. In this work, motivated by the thinking and writing process of humans, we
+propose Skeleton-of-Thought (SoT), which first guides LLMs to generate the
+skeleton of the answer, and then conducts parallel API calls or batched
+decoding to complete the contents of each skeleton point in parallel. Not only
+does SoT provide considerable speed-ups across 12 LLMs, but it can also
+potentially improve the answer quality on several question categories. SoT is
+an initial attempt at data-centric optimization for inference efficiency, and
+showcases the potential of eliciting high-quality answers by explicitly
+planning the answer structure in language.
+                
 ## Llama 2: Open Foundation and Fine-Tuned Chat Models

- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2)  **Published Date:** 2023-07-18
- **Title:** Llama 2: Open Foundation and Fine-Tuned Chat Models
 - **Authors:** Hugo Touvron, Louis Martin, Kevin Stone,  et al.
+- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2)  **Published Date:** 2023-07-18
 - **LangChain:**

   - **Cookbook:** [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
@@ -275,11 +378,32 @@ detailed description of our approach to fine-tuning and safety improvements of
 Llama 2-Chat in order to enable the community to build on our work and
 contribute to the responsible development of LLMs.
                
+## Lost in the Middle: How Language Models Use Long Contexts
+
+- **Authors:** Nelson F. Liu, Kevin Lin, John Hewitt,  et al.
+- **arXiv id:** [2307.03172v3](http://arxiv.org/abs/2307.03172v3)  **Published Date:** 2023-07-06
+- **LangChain:**
+
+   - **Documentation:** [docs/how_to/long_context_reorder](https://python.langchain.com/v0.2/docs/how_to/long_context_reorder)
+
+**Abstract:** While recent language models have the ability to take long contexts as input,
+relatively little is known about how well they use longer context. We analyze
+the performance of language models on two tasks that require identifying
+relevant information in their input contexts: multi-document question answering
+and key-value retrieval. We find that performance can degrade significantly
+when changing the position of relevant information, indicating that current
+language models do not robustly make use of information in long input contexts.
+In particular, we observe that performance is often highest when relevant
+information occurs at the beginning or end of the input context, and
+significantly degrades when models must access relevant information in the
+middle of long contexts, even for explicitly long-context models. Our analysis
+provides a better understanding of how language models use their input context
+and provides new evaluation protocols for future long-context language models.
+                
 ## Query Rewriting for Retrieval-Augmented Large Language Models

- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3)  **Published Date:** 2023-05-23
- **Title:** Query Rewriting for Retrieval-Augmented Large Language Models
 - **Authors:** Xinbei Ma, Yeyun Gong, Pengcheng He,  et al.
+- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3)  **Published Date:** 2023-05-23
 - **LangChain:**

   - **Template:** [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read)
@@ -305,12 +429,11 @@ for retrieval-augmented LLM.
                
 ## Large Language Model Guided Tree-of-Thought

- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1)  **Published Date:** 2023-05-15
- **Title:** Large Language Model Guided Tree-of-Thought
 - **Authors:** Jieyi Long
+- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1)  **Published Date:** 2023-05-15
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.tot](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.tot)
+   - **API Reference:** [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot)
   - **Cookbook:** [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)

 **Abstract:** In this paper, we introduce the Tree-of-Thought (ToT) framework, a novel
@@ -333,9 +456,8 @@ implementation of the ToT-based Sudoku solver is available on GitHub:
                
 ## Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models

- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3)  **Published Date:** 2023-05-06
- **Title:** Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models
 - **Authors:** Lei Wang, Wanyu Xu, Yihuai Lan,  et al.
+- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3)  **Published Date:** 2023-05-06
 - **LangChain:**

   - **Cookbook:** [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
@@ -364,12 +486,12 @@ https://github.com/AGI-Edgerunners/Plan-and-Solve-Prompting.
                
 ## Zero-Shot Listwise Document Reranking with a Large Language Model

- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1)  **Published Date:** 2023-05-03
- **Title:** Zero-Shot Listwise Document Reranking with a Large Language Model
 - **Authors:** Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al.
+- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1)  **Published Date:** 2023-05-03
 - **LangChain:**

-   - **API Reference:** [langchain...LLMListwiseRerank](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+   - **Documentation:** [docs/how_to/contextual_compression](https://python.langchain.com/v0.2/docs/how_to/contextual_compression)
+   - **API Reference:** [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)

 **Abstract:** Supervised ranking methods based on bi-encoder or cross-encoder architectures
 have shown success in multi-stage text ranking tasks, but they require large
@@ -388,12 +510,11 @@ with results showing its potential to generalize across different languages.
                
 ## Visual Instruction Tuning

- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2)  **Published Date:** 2023-04-17
- **Title:** Visual Instruction Tuning
 - **Authors:** Haotian Liu, Chunyuan Li, Qingyang Wu,  et al.
+- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2)  **Published Date:** 2023-04-17
 - **LangChain:**

-   - **Cookbook:** [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
+   - **Cookbook:** [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)

 **Abstract:** Instruction tuning large language models (LLMs) using machine-generated
 instruction-following data has improved zero-shot capabilities on new tasks,
@@ -413,12 +534,11 @@ publicly available.
                
 ## Generative Agents: Interactive Simulacra of Human Behavior

- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2)  **Published Date:** 2023-04-07
- **Title:** Generative Agents: Interactive Simulacra of Human Behavior
 - **Authors:** Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al.
+- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2)  **Published Date:** 2023-04-07
 - **LangChain:**

-   - **Cookbook:** [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
+   - **Cookbook:** [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)

 **Abstract:** Believable proxies of human behavior can empower interactive applications
 ranging from immersive environments to rehearsal spaces for interpersonal
@@ -447,9 +567,8 @@ interaction patterns for enabling believable simulations of human behavior.
                
 ## CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society

- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2)  **Published Date:** 2023-03-31
- **Title:** CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society
 - **Authors:** Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al.
+- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2)  **Published Date:** 2023-03-31
 - **LangChain:**

   - **Cookbook:** [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
@@ -475,12 +594,11 @@ agents and beyond: https://github.com/camel-ai/camel.
                
 ## HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face

- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4)  **Published Date:** 2023-03-30
- **Title:** HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face
 - **Authors:** Yongliang Shen, Kaitao Song, Xu Tan,  et al.
+- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4)  **Published Date:** 2023-03-30
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.autonomous_agents](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.autonomous_agents)
+   - **API Reference:** [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents)
   - **Cookbook:** [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)

 **Abstract:** Solving complicated AI tasks with different domains and modalities is a key
@@ -505,12 +623,11 @@ realization of artificial general intelligence.
                
 ## A Watermark for Large Language Models

- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4)  **Published Date:** 2023-01-24
- **Title:** A Watermark for Large Language Models
 - **Authors:** John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al.
+- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4)  **Published Date:** 2023-01-24
 - **LangChain:**

-   - **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Potential harms of large language models can be mitigated by watermarking
 model output, i.e., embedding signals into generated text that are invisible to
@@ -528,12 +645,12 @@ family, and discuss robustness and security.
                
 ## Precise Zero-Shot Dense Retrieval without Relevance Labels

- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1)  **Published Date:** 2022-12-20
- **Title:** Precise Zero-Shot Dense Retrieval without Relevance Labels
 - **Authors:** Luyu Gao, Xueguang Ma, Jimmy Lin,  et al.
+- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1)  **Published Date:** 2022-12-20
 - **LangChain:**

-   - **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+   - **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
   - **Template:** [hyde](https://python.langchain.com/docs/templates/hyde)
   - **Cookbook:** [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)

@@ -555,14 +672,40 @@ state-of-the-art unsupervised dense retriever Contriever and shows strong
 performance comparable to fine-tuned retrievers, across various tasks (e.g. web
 search, QA, fact verification) and languages~(e.g. sw, ko, ja).
                
-## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
+## Constitutional AI: Harmlessness from AI Feedback

- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3)  **Published Date:** 2022-12-12
- **Title:** Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
- **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al.
+- **Authors:** Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al.
+- **arXiv id:** [2212.08073v1](http://arxiv.org/abs/2212.08073v1)  **Published Date:** 2022-12-15
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.fallacy_removal](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.fallacy_removal)
+   - **Documentation:** [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/v0.2/docs/versions/migrating_chains/constitutional_chain)
+
+**Abstract:** As AI systems become more capable, we would like to enlist their help to
+supervise other AIs. We experiment with methods for training a harmless AI
+assistant through self-improvement, without any human labels identifying
+harmful outputs. The only human oversight is provided through a list of rules
+or principles, and so we refer to the method as 'Constitutional AI'. The
+process involves both a supervised learning and a reinforcement learning phase.
+In the supervised phase we sample from an initial model, then generate
+self-critiques and revisions, and then finetune the original model on revised
+responses. In the RL phase, we sample from the finetuned model, use a model to
+evaluate which of the two samples is better, and then train a preference model
+from this dataset of AI preferences. We then train with RL using the preference
+model as the reward signal, i.e. we use 'RL from AI Feedback' (RLAIF). As a
+result we are able to train a harmless but non-evasive AI assistant that
+engages with harmful queries by explaining its objections to them. Both the SL
+and RL methods can leverage chain-of-thought style reasoning to improve the
+human-judged performance and transparency of AI decision making. These methods
+make it possible to control AI behavior more precisely and with far fewer human
+labels.
+                
+## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
+
+- **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al.
+- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3)  **Published Date:** 2022-12-12
+- **LangChain:**
+
+   - **API Reference:** [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)

 **Abstract:** The spread of misinformation, propaganda, and flawed argumentation has been
 amplified in the Internet era. Given the volume of data and the subtlety of
@@ -588,12 +731,11 @@ further work on logical fallacy identification.
                
 ## Complementary Explanations for Effective In-Context Learning

- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2)  **Published Date:** 2022-11-25
- **Title:** Complementary Explanations for Effective In-Context Learning
 - **Authors:** Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al.
+- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2)  **Published Date:** 2022-11-25
 - **LangChain:**

-   - **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
+   - **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)

 **Abstract:** Large language models (LLMs) have exhibited remarkable capabilities in
 learning from explanations in prompts, but there has been limited understanding
@@ -614,12 +756,11 @@ performance across three real-world tasks on multiple LLMs.
                
 ## PAL: Program-aided Language Models

- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2)  **Published Date:** 2022-11-18
- **Title:** PAL: Program-aided Language Models
 - **Authors:** Luyu Gao, Aman Madaan, Shuyan Zhou,  et al.
+- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2)  **Published Date:** 2022-11-18
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.pal_chain](https://python.langchain.com/v0.2/api_reference//python/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://python.langchain.com/v0.2/api_reference/experimental/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
+   - **API Reference:** [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
   - **Cookbook:** [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)

 **Abstract:** Large language models (LLMs) have recently demonstrated an impressive ability
@@ -645,15 +786,33 @@ accuracy on the GSM8K benchmark of math word problems, surpassing PaLM-540B
 which uses chain-of-thought by absolute 15% top-1. Our code and data are
 publicly available at http://reasonwithpal.com/ .
                
-## ReAct: Synergizing Reasoning and Acting in Language Models
+## An Analysis of Fusion Functions for Hybrid Retrieval

- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3)  **Published Date:** 2022-10-06
- **Title:** ReAct: Synergizing Reasoning and Acting in Language Models
- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al.
+- **Authors:** Sebastian Bruch, Siyu Gai, Amir Ingber
+- **arXiv id:** [2210.11934v2](http://arxiv.org/abs/2210.11934v2)  **Published Date:** 2022-10-21
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping)
-   - **API Reference:** [langchain...TrajectoryEvalChain](https://python.langchain.com/v0.2/api_reference/langchain/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** We study hybrid search in text retrieval where lexical and semantic search
+are fused together with the intuition that the two are complementary in how
+they model relevance. In particular, we examine fusion by a convex combination
+(CC) of lexical and semantic scores, as well as the Reciprocal Rank Fusion
+(RRF) method, and identify their advantages and potential pitfalls. Contrary to
+existing studies, we find RRF to be sensitive to its parameters; that the
+learning of a CC fusion is generally agnostic to the choice of score
+normalization; that CC outperforms RRF in in-domain and out-of-domain settings;
+and finally, that CC is sample efficient, requiring only a small set of
+training examples to tune its only parameter to a target domain.
+                
+## ReAct: Synergizing Reasoning and Acting in Language Models
+
+- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al.
+- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3)  **Published Date:** 2022-10-06
+- **LangChain:**
+
+   - **Documentation:** [docs/integrations/tools/ionic_shopping](https://python.langchain.com/v0.2/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/v0.2/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+   - **API Reference:** [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)

 **Abstract:** While large language models (LLMs) have demonstrated impressive capabilities
 across tasks in language understanding and interactive decision making, their
@@ -680,12 +839,11 @@ Project site with code: https://react-lm.github.io
                
 ## Deep Lake: a Lakehouse for Deep Learning

- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2)  **Published Date:** 2022-09-22
- **Title:** Deep Lake: a Lakehouse for Deep Learning
 - **Authors:** Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al.
+- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2)  **Published Date:** 2022-09-22
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
+   - **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/v0.2/docs/integrations/providers/activeloop_deeplake)

 **Abstract:** Traditional data lakes provide critical data infrastructure for analytical
 workloads by enabling time travel, running SQL queries, ingesting data with
@@ -706,12 +864,11 @@ TensorFlow, JAX, and integrate with numerous MLOps tools.
                
 ## Matryoshka Representation Learning

- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4)  **Published Date:** 2022-05-26
- **Title:** Matryoshka Representation Learning
 - **Authors:** Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al.
+- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4)  **Published Date:** 2022-05-26
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
+   - **Documentation:** [docs/integrations/providers/snowflake](https://python.langchain.com/v0.2/docs/integrations/providers/snowflake)

 **Abstract:** Learned representations are a central component in modern ML systems, serving
 a multitude of downstream tasks. When training such representations, it is
@@ -738,12 +895,11 @@ are open-sourced at https://github.com/RAIVNLab/MRL.
                
 ## Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages

- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1)  **Published Date:** 2022-05-25
- **Title:** Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages
 - **Authors:** Kevin Heffernan, Onur Çelebi, Holger Schwenk
+- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1)  **Published Date:** 2022-05-25
 - **LangChain:**

-   - **API Reference:** [langchain_community...LaserEmbeddings](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
+   - **API Reference:** [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)

 **Abstract:** Scaling multilingual representation learning beyond the hundred most frequent
 languages is challenging, in particular to cover the long tail of low-resource
@@ -765,12 +921,12 @@ encoders, mine bitexts, and validate the bitexts by training NMT systems.
                
 ## Evaluating the Text-to-SQL Capabilities of Large Language Models

- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1)  **Published Date:** 2022-03-15
- **Title:** Evaluating the Text-to-SQL Capabilities of Large Language Models
 - **Authors:** Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau
+- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1)  **Published Date:** 2022-03-15
 - **LangChain:**

-   - **API Reference:** [langchain_community...SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
+   - **Documentation:** [docs/tutorials/sql_qa](https://python.langchain.com/v0.2/docs/tutorials/sql_qa)
+   - **API Reference:** [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)

 **Abstract:** We perform an empirical evaluation of Text-to-SQL capabilities of the Codex
 language model. We find that, without any finetuning, Codex is a strong
@@ -782,12 +938,11 @@ few-shot examples.
                
 ## Locally Typical Sampling

- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5)  **Published Date:** 2022-02-01
- **Title:** Locally Typical Sampling
 - **Authors:** Clara Meister, Tiago Pimentel, Gian Wiher,  et al.
+- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5)  **Published Date:** 2022-02-01
 - **LangChain:**

-   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Today's probabilistic language generators fall short when it comes to
 producing coherent and fluent text despite the fact that the underlying models
@@ -810,14 +965,35 @@ locally typical sampling offers competitive performance (in both abstractive
 summarization and story generation) in terms of quality while consistently
 reducing degenerate repetitions.
                
-## Learning Transferable Visual Models From Natural Language Supervision
+## ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction

- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1)  **Published Date:** 2021-02-26
- **Title:** Learning Transferable Visual Models From Natural Language Supervision
- **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy,  et al.
+- **Authors:** Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al.
+- **arXiv id:** [2112.01488v3](http://arxiv.org/abs/2112.01488v3)  **Published Date:** 2021-12-02
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.open_clip](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.open_clip)
+   - **Documentation:** [docs/integrations/retrievers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/v0.2/docs/integrations/providers/dspy)
+
+**Abstract:** Neural information retrieval (IR) has greatly advanced search and other
+knowledge-intensive language tasks. While many neural IR methods encode queries
+and documents into single-vector representations, late interaction models
+produce multi-vector representations at the granularity of each token and
+decompose relevance modeling into scalable token-level computations. This
+decomposition has been shown to make late interaction more effective, but it
+inflates the space footprint of these models by an order of magnitude. In this
+work, we introduce ColBERTv2, a retriever that couples an aggressive residual
+compression mechanism with a denoised supervision strategy to simultaneously
+improve the quality and space footprint of late interaction. We evaluate
+ColBERTv2 across a wide range of benchmarks, establishing state-of-the-art
+quality within and outside the training domain while reducing the space
+footprint of late interaction models by 6--10$\times$.
+                
+## Learning Transferable Visual Models From Natural Language Supervision
+
+- **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy,  et al.
+- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1)  **Published Date:** 2021-02-26
+- **LangChain:**
+
+   - **API Reference:** [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)

 **Abstract:** State-of-the-art computer vision systems are trained to predict a fixed set
 of predetermined object categories. This restricted form of supervision limits
@@ -840,14 +1016,77 @@ zero-shot without needing to use any of the 1.28 million training examples it
 was trained on. We release our code and pre-trained model weights at
 https://github.com/OpenAI/CLIP.
                
-## CTRL: A Conditional Transformer Language Model for Controllable Generation
+## Language Models are Few-Shot Learners

- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2)  **Published Date:** 2019-09-11
- **Title:** CTRL: A Conditional Transformer Language Model for Controllable Generation
- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al.
+- **Authors:** Tom B. Brown, Benjamin Mann, Nick Ryder,  et al.
+- **arXiv id:** [2005.14165v4](http://arxiv.org/abs/2005.14165v4)  **Published Date:** 2020-05-28
 - **LangChain:**

-   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Recent work has demonstrated substantial gains on many NLP tasks and
+benchmarks by pre-training on a large corpus of text followed by fine-tuning on
+a specific task. While typically task-agnostic in architecture, this method
+still requires task-specific fine-tuning datasets of thousands or tens of
+thousands of examples. By contrast, humans can generally perform a new language
+task from only a few examples or from simple instructions - something which
+current NLP systems still largely struggle to do. Here we show that scaling up
+language models greatly improves task-agnostic, few-shot performance, sometimes
+even reaching competitiveness with prior state-of-the-art fine-tuning
+approaches. Specifically, we train GPT-3, an autoregressive language model with
+175 billion parameters, 10x more than any previous non-sparse language model,
+and test its performance in the few-shot setting. For all tasks, GPT-3 is
+applied without any gradient updates or fine-tuning, with tasks and few-shot
+demonstrations specified purely via text interaction with the model. GPT-3
+achieves strong performance on many NLP datasets, including translation,
+question-answering, and cloze tasks, as well as several tasks that require
+on-the-fly reasoning or domain adaptation, such as unscrambling words, using a
+novel word in a sentence, or performing 3-digit arithmetic. At the same time,
+we also identify some datasets where GPT-3's few-shot learning still struggles,
+as well as some datasets where GPT-3 faces methodological issues related to
+training on large web corpora. Finally, we find that GPT-3 can generate samples
+of news articles which human evaluators have difficulty distinguishing from
+articles written by humans. We discuss broader societal impacts of this finding
+and of GPT-3 in general.
+                
+## Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
+
+- **Authors:** Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al.
+- **arXiv id:** [2005.11401v4](http://arxiv.org/abs/2005.11401v4)  **Published Date:** 2020-05-22
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Large pre-trained language models have been shown to store factual knowledge
+in their parameters, and achieve state-of-the-art results when fine-tuned on
+downstream NLP tasks. However, their ability to access and precisely manipulate
+knowledge is still limited, and hence on knowledge-intensive tasks, their
+performance lags behind task-specific architectures. Additionally, providing
+provenance for their decisions and updating their world knowledge remain open
+research problems. Pre-trained models with a differentiable access mechanism to
+explicit non-parametric memory can overcome this issue, but have so far been
+only investigated for extractive downstream tasks. We explore a general-purpose
+fine-tuning recipe for retrieval-augmented generation (RAG) -- models which
+combine pre-trained parametric and non-parametric memory for language
+generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index
+of Wikipedia, accessed with a pre-trained neural retriever. We compare two RAG
+formulations, one which conditions on the same retrieved passages across the
+whole generated sequence, the other can use different passages per token. We
+fine-tune and evaluate our models on a wide range of knowledge-intensive NLP
+tasks and set the state-of-the-art on three open domain QA tasks, outperforming
+parametric seq2seq models and task-specific retrieve-and-extract architectures.
+For language generation tasks, we find that RAG models generate more specific,
+diverse and factual language than a state-of-the-art parametric-only seq2seq
+baseline.
+                
+## CTRL: A Conditional Transformer Language Model for Controllable Generation
+
+- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al.
+- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2)  **Published Date:** 2019-09-11
+- **LangChain:**
+
+   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Large-scale language models show promising text generation capabilities, but
 users cannot easily control particular aspects of the generated text. We
--- a/docs/docs/concepts.mdx
+++ b/docs/docs/concepts.mdx
@@ -15,11 +15,6 @@ The interfaces for core components like LLMs, vector stores, retrievers and more
 No third party integrations are defined here.
 The dependencies are kept purposefully very lightweight.

-### Partner packages
-
-While the long tail of integrations are in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
-This was done in order to improve support for these important integrations.
-
 ### `langchain`

 The main `langchain` package contains chains, agents, and retrieval strategies that make up an application's cognitive architecture.
@@ -33,6 +28,11 @@ Key partner packages are separated out (see below).
 This contains all integrations for various components (LLMs, vector stores, retrievers).
 All dependencies in this package are optional to keep the package as lightweight as possible.

+### Partner packages
+
+While the long tail of integrations is in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
+This was done in order to improve support for these important integrations.
+
 ### [`langgraph`](https://langchain-ai.github.io/langgraph)

 `langgraph` is an extension of `langchain` aimed at
@@ -61,28 +61,28 @@ A developer platform that lets you debug, test, evaluate, and monitor LLM applic
 ## LangChain Expression Language (LCEL)
 <span data-heading-keywords="lcel"></span>

-LangChain Expression Language, or LCEL, is a declarative way to chain LangChain components.
+`LangChain Expression Language`, or `LCEL`, is a declarative way to chain LangChain components.
 LCEL was designed from day 1 to **support putting prototypes in production, with no code changes**, from the simplest “prompt + LLM” chain to the most complex chains (we’ve seen folks successfully run LCEL chains with 100s of steps in production). To highlight a few of the reasons you might want to use LCEL:

-**First-class streaming support**
+- **First-class streaming support:**
 When you build your chains with LCEL you get the best possible time-to-first-token (time elapsed until the first chunk of output comes out). For some chains this means eg. we stream tokens straight from an LLM to a streaming output parser, and you get back parsed, incremental chunks of output at the same rate as the LLM provider outputs the raw tokens.

-**Async support**
+- **Async support:**
 Any chain built with LCEL can be called both with the synchronous API (eg. in your Jupyter notebook while prototyping) as well as with the asynchronous API (eg. in a [LangServe](/docs/langserve/) server). This enables using the same code for prototypes and in production, with great performance, and the ability to handle many concurrent requests in the same server.

-**Optimized parallel execution**
+- **Optimized parallel execution:**
 Whenever your LCEL chains have steps that can be executed in parallel (eg if you fetch documents from multiple retrievers) we automatically do it, both in the sync and the async interfaces, for the smallest possible latency.

-**Retries and fallbacks**
+- **Retries and fallbacks:**
 Configure retries and fallbacks for any part of your LCEL chain. This is a great way to make your chains more reliable at scale. We’re currently working on adding streaming support for retries/fallbacks, so you can get the added reliability without any latency cost.

-**Access intermediate results**
+- **Access intermediate results:**
 For more complex chains it’s often very useful to access the results of intermediate steps even before the final output is produced. This can be used to let end-users know something is happening, or even just to debug your chain. You can stream intermediate results, and it’s available on every [LangServe](/docs/langserve) server.

-**Input and output schemas**
+- **Input and output schemas**
 Input and output schemas give every LCEL chain Pydantic and JSONSchema schemas inferred from the structure of your chain. This can be used for validation of inputs and outputs, and is an integral part of LangServe.

-[**Seamless LangSmith tracing**](https://docs.smith.langchain.com)
+- [**Seamless LangSmith tracing**](https://docs.smith.langchain.com)
 As your chains get more and more complex, it becomes increasingly important to understand what exactly is happening at every step.
 With LCEL, **all** steps are automatically logged to [LangSmith](https://docs.smith.langchain.com/) for maximum observability and debuggability.

@@ -186,7 +186,7 @@ For a full list of LangChain model providers with multimodal models, [check out
 <span data-heading-keywords="llm,llms"></span>

 :::caution
-Pure text-in/text-out LLMs tend to be older or lower-level. Many popular models are best used as [chat completion models](/docs/concepts/#chat-models),
+Pure text-in/text-out LLMs tend to be older or lower-level. Many new popular models are best used as [chat completion models](/docs/concepts/#chat-models),
 even for non-chat use cases.

 You are probably looking for [the section above instead](/docs/concepts/#chat-models).
@@ -201,7 +201,7 @@ When messages are passed in as input, they will be formatted into a string under

 LangChain does not host any LLMs, rather we rely on third party integrations.

-For specifics on how to use LLMs, see the [relevant how-to guides here](/docs/how_to/#llms).
+For specifics on how to use LLMs, see the [how-to guides](/docs/how_to/#llms).

 ### Messages

@@ -215,7 +215,7 @@ LangChain has different message classes for different roles.
 The `content` property describes the content of the message.
 This can be a few different things:

- A string (most models deal this type of content)
+- A string (most models deal with this type of content)
 - A List of dictionaries (this is used for multimodal input, where the dictionary contains information about that input type and that input location)

 Optionally, messages can have a `name` property which allows for differentiating between multiple speakers with the same role.
@@ -365,24 +365,18 @@ See documentation for that [here](/docs/concepts/#function-tool-calling).

 :::

-Responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.
+`Output parser` is responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.
 Useful when you are using LLMs to generate structured data, or to normalize output from chat models and LLMs.

 LangChain has lots of different types of output parsers. This is a list of output parsers LangChain supports. The table below has various pieces of information:

-**Name**: The name of the output parser
-
-**Supports Streaming**: Whether the output parser supports streaming.
-
-**Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.
-
-**Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.
-
-**Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.
-
-**Output Type**: The output type of the object returned by the parser.
-
-**Description**: Our commentary on this output parser and when to use it.
+- **Name**: The name of the output parser
+- **Supports Streaming**: Whether the output parser supports streaming.
+- **Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.
+- **Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.
+- **Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.
+- **Output Type**: The output type of the object returned by the parser.
+- **Description**: Our commentary on this output parser and when to use it.

 | Name            | Supports Streaming | Has Format Instructions       | Calls LLM | Input Type                       | Output Type          | Description                                                                                                                                                                                                                                              |
 |-----------------|--------------------|-------------------------------|-----------|----------------------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -534,10 +528,10 @@ Tools are needed whenever you want a model to control parts of your code or call

 A tool consists of:

-1. The name of the tool.
-2. A description of what the tool does.
-3. A JSON schema defining the inputs to the tool.
-4. A function (and, optionally, an async variant of the function).
+1. The `name` of the tool.
+2. A `description` of what the tool does.
+3. A `JSON schema` defining the inputs to the tool.
+4. A `function` (and, optionally, an async variant of the function).

 When a tool is bound to a model, the name, description and JSON schema are provided as context to the model.
 Given a list of tools and a set of instructions, a model can request to call one or more tools with specific inputs.
@@ -650,14 +644,14 @@ The results of those actions can then be fed back into the agent and it determin
 [LangGraph](https://github.com/langchain-ai/langgraph) is an extension of LangChain specifically aimed at creating highly controllable and customizable agents.
 Please check out that documentation for a more in depth overview of agent concepts.

-There is a legacy agent concept in LangChain that we are moving towards deprecating: `AgentExecutor`.
+There is a legacy `agent` concept in LangChain that we are moving towards deprecating: `AgentExecutor`.
 AgentExecutor was essentially a runtime for agents.
 It was a great place to get started, however, it was not flexible enough as you started to have more customized agents.
 In order to solve that we built LangGraph to be this flexible, highly-controllable runtime.

 If you are still using AgentExecutor, do not fear: we still have a guide on [how to use AgentExecutor](/docs/how_to/agent_executor).
 It is recommended, however, that you start to transition to LangGraph.
-In order to assist in this we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).
+In order to assist in this, we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).

 #### ReAct agents
 <span data-heading-keywords="react,react agent"></span>
@@ -743,7 +737,7 @@ callbacks to any child objects.
 :::important Async in Python<=3.10

 Any `RunnableLambda`, a `RunnableGenerator`, or `Tool` that invokes other runnables
-and is running async in python<=3.10, will have to propagate callbacks to child
+and is running `async` in python<=3.10, will have to propagate callbacks to child
 objects manually. This is because LangChain cannot automatically propagate
 callbacks to child objects in this case.

@@ -873,7 +867,7 @@ Furthermore, using tokens can also improve efficiency, since the model processes
 ### Function/tool calling

 :::info
-We use the term tool calling interchangeably with function calling. Although
+We use the term `tool calling` interchangeably with `function calling`. Although
 function calling is sometimes meant to refer to invocations of a single function,
 we treat all models as though they can return multiple tool or function calls in
 each message.
@@ -968,7 +962,6 @@ structured_llm.invoke("Tell me a joke about cats")

 ```
 Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=None)
-
 ```

 We recommend this method as a starting point when working with structured output:
@@ -1107,7 +1100,11 @@ For a full list of model providers that support tool calling, [see this table](/

 ### Few-shot prompting

-One of the most effective ways to improve model performance is to give a model examples of what you want it to do. The technique of adding example inputs and expected outputs to a model prompt is known as "few-shot prompting". There are a few things to think about when doing few-shot prompting:
+One of the most effective ways to improve model performance is to give a model examples of 
+what you want it to do. The technique of adding example inputs and expected outputs 
+to a model prompt is known as "few-shot prompting". The technique is based on the
+[Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) paper.
+There are a few things to think about when doing few-shot prompting:

 1. How are examples generated?
 2. How many examples are in each prompt?
@@ -1182,8 +1179,10 @@ You can see a case study of how Anthropic and OpenAI respond to different few-sh

 ### Retrieval

-LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). 
-Retrieval is the process of providing relevant information to an LLM to improve its response for a given input. Retrieval augmented generation (RAG) is the process of grounding the LLM generation (output) using the retrieved information.
+LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. 
+Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). 
+`Retrieval` is the process of providing relevant information to an LLM to improve its response for a given input. 
+`Retrieval augmented generation` (`RAG`) [paper](https://arxiv.org/abs/2005.11401) is the process of grounding the LLM generation (output) using the retrieved information.

 :::tip

@@ -1203,12 +1202,12 @@ First, consider the user input(s) to your RAG system. Ideally, a RAG system can
 **Using an LLM to review and optionally modify the input is the central idea behind query translation.** This serves as a general buffer, optimizing raw user inputs for your retrieval system. 
 For example, this can be as simple as extracting keywords or as complex as generating multiple sub-questions for a complex query.

-| Name          | When to use | Description |
-|---------------|-------------|-------------|
+| Name          | When to use | Description                                                                                                                                                                                                                                                                            |
+|---------------|-------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [Multi-query](/docs/how_to/MultiQueryRetriever/)   | When you need to cover multiple perspectives of a question. | Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, return the unique documents for all queries. |
-| [Decomposition](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb) | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer). |
-| [Step-back](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)     | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question. |
-| [HyDE](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)          | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches. |
+| [Decomposition](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb) | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer).                                                           |
+| [Step-back](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)     | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question. [Paper](https://arxiv.org/pdf/2310.06117).                                            |
+| [HyDE](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)          | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches. [Paper](https://arxiv.org/abs/2212.10496). |

 :::tip

@@ -1282,11 +1281,11 @@ Fifth, consider ways to improve the quality of your similarity search itself. Em

 There are some additional tricks to improve the quality of your retrieval. Embeddings excel at capturing semantic information, but may struggle with keyword-based queries. Many [vector stores](/docs/integrations/retrievers/pinecone_hybrid_search/) offer built-in [hybrid-search](https://docs.pinecone.io/guides/data/understanding-hybrid-search) to combine keyword and semantic similarity, which marries the benefits of both approaches. Furthermore, many vector stores have [maximal marginal relevance](https://python.langchain.com/v0.1/docs/modules/model_io/prompts/example_selectors/mmr/), which attempts to diversify the results of a search to avoid returning similar and redundant documents. 

-| Name              | When to use                                              | Description |
-|-------------------|----------------------------------------------------------|-------------|
-| [ColBERT](/docs/integrations/providers/ragatouille/#using-colbert-as-a-reranker)           | When higher granularity embeddings are needed.           | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score. |
-| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)     | When combining keyword-based and semantic similarity.    | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. |
-| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents. |
+| Name              | When to use                                              | Description                                                                                                                                                                            |
+|-------------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [ColBERT](/docs/integrations/providers/ragatouille/#using-colbert-as-a-reranker)           | When higher granularity embeddings are needed.           | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score. [Paper](https://arxiv.org/abs/2112.01488). |
+| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)     | When combining keyword-based and semantic similarity.    | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. [Paper](https://arxiv.org/abs/2210.11934).                                                                               |
+| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents.                                                                                  |

 :::tip

@@ -1306,7 +1305,7 @@ Sixth, consider ways to filter or rank retrieved documents. This is very useful

 :::tip

-See our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared), on approach for post-processing across multiple queries:  Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).
+See our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared) ([paper](https://arxiv.org/abs/2402.03367)), on approach for post-processing across multiple queries:  Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).

 :::

--- a/docs/docs/contributing/faq.mdx
+++ b/docs/docs/contributing/faq.mdx
@@ -24,3 +24,16 @@ for more information.
 Notably, Github doesn't allow this setting to be enabled for forks in **organizations** ([issue](https://github.com/orgs/community/discussions/5634)).
 If you are working in an organization, we recommend submitting your PR from a personal
 fork in order to enable this setting.
+
+### Why hasn't my PR been reviewed?
+
+Please reference our [Review Process](/docs/contributing/review_process/).
+
+### Why was my PR closed?
+
+Please reference our [Review Process](/docs/contributing/review_process/).
+
+### I think my PR was closed in a way that didn't follow the review process. What should I do?
+
+Tag `@efriis` in the PR comments referencing the portion of the review
+process that you believe was not followed. We'll take a look!
--- a/docs/docs/contributing/review_process.mdx
+++ b/docs/docs/contributing/review_process.mdx
@@ -0,0 +1,95 @@
+# Review Process
+
+## Overview
+
+This document outlines the process used by the LangChain maintainers for reviewing pull requests (PRs). The primary objective of this process is to enhance the LangChain developer experience.
+
+## Review Statuses
+
+We categorize PRs using three main statuses, which are marked as project item statuses in the right sidebar and can be viewed in detail [here](https://github.com/orgs/langchain-ai/projects/12/views/1).
+
+- **Triage**: 
+  - Initial status for all newly submitted PRs.
+  - Requires a maintainer to categorize it into one of the other statuses.
+
+- **Needs Support**:
+  - PRs that require community feedback or additional input before moving forward.
+  - Automatically promoted to the backlog if it receives 5 upvotes.
+  - An auto-comment is generated when this status is applied, explaining the flow and the upvote requirement.
+  - If the PR remains in this status for 25 days, it will be marked as “stale” via auto-comment.
+  - PRs will be auto-closed after 30 days if no further action is taken.
+
+- **In Review**:
+  - PRs that are actively under review by our team.
+  - These are regularly reviewed and monitored.
+
+**Note:** A PR may only have one status at a time.
+
+**Note:** You may notice 3 additional statuses of Done, Closed, and Internal that
+are external to this lifecycle. Done and Closed PRs have been merged or closed,
+respectively. Internal is for PRs submitted by core maintainers, and these PRs are owned
+by the submitter.
+
+## Review Guidelines
+
+1. **PRs that touch /libs/core**:
+   - PRs that directly impact core code and are likely to affect end users.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - These PRs are given top priority and are reviewed the fastest.
+   - PRs that don't have a **concise** descriptions of their motivation (either in PR summary of in a linked issue) are likely to be closed without an in-depth review. Please do not generate verbose PR descriptions with an LLM.
+   - PRs that don't have unit tests are likely to be closed.
+   - Feature requests should first be opened as a GitHub issue and discussed with the LangChain maintainers. Large PRs submitted without prior discussion are likely to be closed.
+
+2. **PRs that touch /libs/langchain**:
+   - High-impact PRs that are closely related to core PRs but slightly lower in priority.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - These are reviewed and closed aggressively, similar to core PRs.
+   - New feature requests should be discussed with the core maintainer team beforehand in an issue.
+
+3. **PRs that touch /libs/partners/****:
+   - PRs involving integration packages.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - The review may be conducted by our team or handed off to the partner's development team, depending on the PR's content.
+   - We maintain communication lines with most partner dev teams to facilitate this process.
+
+4. **Community PRs**:
+   - Most community PRs will get an initial status of "needs support".
+   - **Triage Guideline**: most PRs should go to `Needs support`. Bugfixes on high-traffic integrations should go straight to `In review`.
+   - **Triage Guideline**: all new features and integrations should go to `Needs support` and will be closed if they do not get enough support (measured by upvotes or comments).
+   - PRs in the `Needs Support` status for 20 days are marked as “stale” and will be closed after 30 days if no action is taken.
+
+5. **Documentation PRs**:
+   - PRs that touch the documentation content in docs/docs.
+   - **Triage Guideline**:
+      - PRs that fix typos or small errors in a single file and pass CI should go straight to `In Review`.
+      - PRs that make changes that have been discussed and agreed upon in an issue should go straight to `In Review`.
+      - PRs that add new pages or change the structure of the documentation should go to `Needs Support`.
+   - We strive to standardize documentation formats to streamline the review process.
+   - CI jobs run against documentation to ensure adherence to standards, automating much of the review.
+
+6. **PRs must be in English**:
+   - PRs that are not in English will be closed without review.
+   - This is to ensure that all maintainers can review the PRs effectively.
+
+## How to see a PR's status
+
+See screenshot:
+
+![PR Status](/img/review_process_status.png)
+
+*To see the status of all open PRs, please visit the [LangChain Project Board](https://github.com/orgs/langchain-ai/projects/12/views/2).*
+
+## Review Prioritization
+
+Our goal is to provide the best possible development experience by focusing on making software that:
+
+- Works: Works as intended (is bug-free).
+- Is useful: Improves LLM app development with components that work off-the-shelf and runtimes that simplify app building.
+- Is easy: Is intuitive to use and well-documented.
+
+We believe this process reflects our priorities and are open to feedback if you feel it does not.
+
+## Github Discussion
+
+We welcome your feedback on this process. Please feel free to add a comment in 
+[this GitHub Discussion](https://github.com/langchain-ai/langchain/discussions/25920).
--- a/docs/docs/how_to/custom_tools.ipynb
+++ b/docs/docs/how_to/custom_tools.ipynb
@@ -9,12 +9,12 @@
    "\n",
    "When constructing an agent, you will need to provide it with a list of `Tool`s that it can use. Besides the actual function that is called, the Tool consists of several components:\n",
    "\n",
-    "| Attribute       | Type                      | Description                                                                                                      |\n",
-    "|-----------------|---------------------------|------------------------------------------------------------------------------------------------------------------|\n",
-    "| name          | str                     | Must be unique within a set of tools provided to an LLM or agent.                                           |\n",
-    "| description   | str                     | Describes what the tool does. Used as context by the LLM or agent.                                       |\n",
-    "| args_schema   | Pydantic BaseModel      | Optional but recommended, can be used to provide more information (e.g., few-shot examples) or validation for expected parameters |\n",
-    "| return_direct   | boolean      | Only relevant for agents. When True, after invoking the given tool, the agent will stop and return the result direcly to the user.  |\n",
+    "| Attribute     | Type                            | Description                                                                                                                                                                    |\n",
+    "|---------------|---------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n",
+    "| name          | str                             | Must be unique within a set of tools provided to an LLM or agent.                                                                                                              |\n",
+    "| description   | str                             | Describes what the tool does. Used as context by the LLM or agent.                                                                                                             |\n",
+    "| args_schema   | langchain.pydantic_v1.BaseModel | Optional but recommended, and required if using callback handlers. It can be used to provide more information (e.g., few-shot examples) or validation for expected parameters. |\n",
+    "| return_direct | boolean                         | Only relevant for agents. When True, after invoking the given tool, the agent will stop and return the result direcly to the user.                                             |\n",
    "\n",
    "LangChain supports the creation of tools from:\n",
    "\n",
--- a/docs/docs/how_to/migrate_agent.ipynb
+++ b/docs/docs/how_to/migrate_agent.ipynb
@@ -82,7 +82,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "1e425fea-2796-4b99-bee6-9a6ffe73f756",
   "metadata": {},
   "outputs": [],
@@ -115,7 +115,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "03ea357c-9c36-4464-b2cc-27bd150e1554",
   "metadata": {},
   "outputs": [
@@ -126,7 +126,7 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -162,7 +162,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "53a3737a-d167-4255-89bf-20ac37f89a3e",
   "metadata": {},
   "outputs": [
@@ -173,7 +173,7 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -193,7 +193,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "74ecebe3-512e-409c-a661-bdd5b0a2b782",
   "metadata": {},
   "outputs": [
@@ -201,10 +201,10 @@
     "data": {
      "text/plain": [
       "{'input': 'Pardon?',\n",
-       " 'output': 'The value you get when you apply `magic_function` to the input 3 is 5.'}"
+       " 'output': 'The value returned by `magic_function` when the input is 3 is 5.'}"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -243,7 +243,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "a9a11ccd-75e2-4c11-844d-a34870b0ff91",
   "metadata": {},
   "outputs": [
@@ -254,7 +254,7 @@
       " 'output': 'El valor de `magic_function(3)` es 5.'}"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -295,7 +295,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "a9486805-676a-4d19-a5c4-08b41b172989",
   "metadata": {},
   "outputs": [],
@@ -324,7 +324,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "id": "d369ab45-0c82-45f4-9d3e-8efb8dd47e2c",
   "metadata": {},
   "outputs": [
@@ -332,7 +332,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'input': 'what is the value of magic_function(3)?', 'output': 'El valor de magic_function(3) es 5. ¡Pandamonium!'}\n"
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'The value of magic_function(3) is 5. ¡Pandamonium!'}\n"
     ]
    }
   ],
@@ -386,7 +386,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "id": "b97beba5-8f74-430c-9399-91b77c8fa15c",
   "metadata": {},
   "outputs": [
@@ -394,11 +394,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hi Polly! The output of the magic function for the input 3 is 5.\n",
+      "Hi Polly! The output of applying the magic function to the input 3 is 5.\n",
      "---\n",
-      "Yes, your name is Polly!\n",
+      "Yes, you mentioned your name is Polly.\n",
      "---\n",
-      "The output of the magic function for the input 3 is 5.\n"
+      "The output of applying the magic function to the input 3 is 5.\n"
     ]
    }
   ],
@@ -476,7 +476,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "id": "baca3dc6-678b-4509-9275-2fd653102898",
   "metadata": {},
   "outputs": [
@@ -484,16 +484,16 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hi Polly! The output of the magic_function for the input of 3 is 5.\n",
+      "Hi Polly! The output of applying the magic function to the input 3 is 5.\n",
      "---\n",
      "Yes, your name is Polly!\n",
      "---\n",
-      "The output of the magic_function for the input of 3 was 5.\n"
+      "The output of applying the magic function to the input 3 was 5.\n"
     ]
    }
   ],
   "source": [
-    "from langgraph.checkpoint import MemorySaver  # an in-memory checkpointer\n",
+    "from langgraph.checkpoint.memory import MemorySaver  # an in-memory checkpointer\n",
    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
    "system_message = \"You are a helpful assistant.\"\n",
@@ -544,7 +544,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "id": "e62843c4-1107-41f0-a50b-aea256e28053",
   "metadata": {},
   "outputs": [
@@ -552,8 +552,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'actions': [ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1exy0rScfPmo4fy27FbQ5qJ2')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])]}\n",
-      "{'steps': [AgentStep(action=ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1exy0rScfPmo4fy27FbQ5qJ2'), observation=5)], 'messages': [FunctionMessage(content='5', name='magic_function')]}\n",
+      "{'actions': [ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_gNzQT96XWoyZqVl1jI1yMnjy')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])]}\n",
+      "{'steps': [AgentStep(action=ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_gNzQT96XWoyZqVl1jI1yMnjy'), observation=5)], 'messages': [FunctionMessage(content='5', name='magic_function')]}\n",
      "{'output': 'The value of `magic_function(3)` is 5.', 'messages': [AIMessage(content='The value of `magic_function(3)` is 5.')]}\n"
     ]
    }
@@ -604,7 +604,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "id": "076ebc85-f804-4093-a25a-a16334c9898e",
   "metadata": {},
   "outputs": [
@@ -612,9 +612,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_my9rzFSKR4T1yYKwCsfbZB8A', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 61, 'total_tokens': 75}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_bc2a86f5f5', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-dd705555-8fae-4fb1-a033-5d99a23e3c22-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_my9rzFSKR4T1yYKwCsfbZB8A', 'type': 'tool_call'}], usage_metadata={'input_tokens': 61, 'output_tokens': 14, 'total_tokens': 75})]}}\n",
-      "{'tools': {'messages': [ToolMessage(content='5', name='magic_function', tool_call_id='call_my9rzFSKR4T1yYKwCsfbZB8A')]}}\n",
-      "{'agent': {'messages': [AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 84, 'total_tokens': 98}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-698cad05-8cb2-4d08-8c2a-881e354f6cc7-0', usage_metadata={'input_tokens': 84, 'output_tokens': 14, 'total_tokens': 98})]}}\n"
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_I0nztlIcc0e9ry5dn53YLZUM', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 61, 'total_tokens': 75}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-5f9bd87d-3692-4d13-8d27-1859e13e2156-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_I0nztlIcc0e9ry5dn53YLZUM', 'type': 'tool_call'}], usage_metadata={'input_tokens': 61, 'output_tokens': 14, 'total_tokens': 75})]}}\n",
+      "{'tools': {'messages': [ToolMessage(content='5', name='magic_function', tool_call_id='call_I0nztlIcc0e9ry5dn53YLZUM')]}}\n",
+      "{'agent': {'messages': [AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 84, 'total_tokens': 98}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-f6015ca6-93e5-45e8-8b28-b3f0a8d203dc-0', usage_metadata={'input_tokens': 84, 'output_tokens': 14, 'total_tokens': 98})]}}\n"
     ]
    }
   ],
@@ -654,7 +654,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
   "id": "a2f720f3-c121-4be2-b498-92c16bb44b0a",
   "metadata": {},
   "outputs": [
@@ -662,7 +662,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-a792db4a-278d-4090-82ae-904a30eada93', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_uPZ2D1Bo5mdED3gwgaeWURrf'), 5)]\n"
+      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-99e06b70-1ef6-4761-834b-87b6c5252e20', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_wjaAyTjI2LSYOq7C8QZYSxEs'), 5)]\n"
     ]
    }
   ],
@@ -684,20 +684,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
   "id": "ef23117a-5ccb-42ce-80c3-ea49a9d3a942",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='cd7d0f49-a0e0-425a-b2b0-603a716058ed'),\n",
-       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_VfZ9287DuybOSrBsQH5X12xf', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a1e965cd-bf61-44f9-aec1-8aaecb80955f-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_VfZ9287DuybOSrBsQH5X12xf', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}),\n",
-       "  ToolMessage(content='5', name='magic_function', id='20d5c2fe-a5d8-47fa-9e04-5282642e2039', tool_call_id='call_VfZ9287DuybOSrBsQH5X12xf'),\n",
-       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 78, 'total_tokens': 92}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-abf9341c-ef41-4157-935d-a3be5dfa2f41-0', usage_metadata={'input_tokens': 78, 'output_tokens': 14, 'total_tokens': 92})]}"
+       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='2d369331-8052-4167-bd85-9f6d8ad021ae'),\n",
+       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_oXiSQSe6WeWj7XIKXxZrO2IC', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-297e7fc9-726f-46a0-8c67-dc28ed1724d0-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_oXiSQSe6WeWj7XIKXxZrO2IC', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}),\n",
+       "  ToolMessage(content='5', name='magic_function', id='46370faf-9598-423c-b94b-aca8cb4f035d', tool_call_id='call_oXiSQSe6WeWj7XIKXxZrO2IC'),\n",
+       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 78, 'total_tokens': 92}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-f48efaff-0c2c-4632-bbf9-7ee626f73d02-0', usage_metadata={'input_tokens': 78, 'output_tokens': 14, 'total_tokens': 92})]}"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -757,7 +757,7 @@
      "Invoking: `magic_function` with `{'input': '3'}`\n",
      "\n",
      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mParece que hubo un error al intentar calcular el valor de la función mágica. ¿Te gustaría que lo intente de nuevo?\u001b[0m\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mHubo un error al intentar obtener el valor de `magic_function(3)`. ¿Podrías intentarlo de nuevo o proporcionar más detalles?\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -766,7 +766,7 @@
     "data": {
      "text/plain": [
       "{'input': 'what is the value of magic_function(3)?',\n",
-       " 'output': 'Parece que hubo un error al intentar calcular el valor de la función mágica. ¿Te gustaría que lo intente de nuevo?'}"
+       " 'output': 'Hubo un error al intentar obtener el valor de `magic_function(3)`. ¿Podrías intentarlo de nuevo o proporcionar más detalles?'}"
      ]
     },
     "execution_count": 17,
@@ -819,12 +819,15 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='what is the value of magic_function(3)?' id='74e2d5e8-2b59-4820-979c-8d11ecfc14c2'\n",
-      "content='' additional_kwargs={'tool_calls': [{'id': 'call_ihtrH6IG95pDXpKluIwAgi3J', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-5a35e465-8a08-43dd-ac8b-4a76dcace305-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_ihtrH6IG95pDXpKluIwAgi3J', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
-      "content='Sorry, there was an error. Please try again.' name='magic_function' id='8c37c19b-3586-46b1-aab9-a045786801a2' tool_call_id='call_ihtrH6IG95pDXpKluIwAgi3J'\n",
-      "content='It seems there was an error in processing the request. Let me try again.' additional_kwargs={'tool_calls': [{'id': 'call_iF0vYWAd6rfely0cXSqdMOnF', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 88, 'total_tokens': 119}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-eb88ec77-d492-43a5-a5dd-4cefef9a6920-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_iF0vYWAd6rfely0cXSqdMOnF', 'type': 'tool_call'}] usage_metadata={'input_tokens': 88, 'output_tokens': 31, 'total_tokens': 119}\n",
-      "content='Sorry, there was an error. Please try again.' name='magic_function' id='c9ff261f-a0f1-4c92-a9f2-cd749f62d911' tool_call_id='call_iF0vYWAd6rfely0cXSqdMOnF'\n",
-      "content='I am currently unable to process the request with the input \"3\" for the `magic_function`. If you have any other questions or need assistance with something else, please let me know!' response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 141, 'total_tokens': 180}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None} id='run-d42508aa-f286-4b57-80fb-f8a76736d470-0' usage_metadata={'input_tokens': 141, 'output_tokens': 39, 'total_tokens': 180}\n"
+      "content='what is the value of magic_function(3)?' id='fe74bb30-45b8-4a40-a5ed-fd6678da5428'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_TNKfNy6fgZNdJAvHUMXwtp8f', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-dad8bfc1-477c-40d2-9016-243d25c0dd13-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_TNKfNy6fgZNdJAvHUMXwtp8f', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='653226e0-3187-40be-a774-4c7c2612239e' tool_call_id='call_TNKfNy6fgZNdJAvHUMXwtp8f'\n",
+      "content='It looks like there was an issue with processing the request. Let me try that again.' additional_kwargs={'tool_calls': [{'id': 'call_K0wJ8fQLYGv8fYXY1Uo5U5sG', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 88, 'total_tokens': 121}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-d4c85437-6625-4e57-81f9-86de6842be7b-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_K0wJ8fQLYGv8fYXY1Uo5U5sG', 'type': 'tool_call'}] usage_metadata={'input_tokens': 88, 'output_tokens': 33, 'total_tokens': 121}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='9b530d03-95df-401e-bb4f-5cada1195033' tool_call_id='call_K0wJ8fQLYGv8fYXY1Uo5U5sG'\n",
+      "content='It seems that there is a persistent issue with processing the request. Let me attempt it one more time.' additional_kwargs={'tool_calls': [{'id': 'call_7ECwwNBDo4SH56oczErZJVRT', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 143, 'total_tokens': 179}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-9f3f651e-a641-4112-99ed-d1ac11169582-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_7ECwwNBDo4SH56oczErZJVRT', 'type': 'tool_call'}] usage_metadata={'input_tokens': 143, 'output_tokens': 36, 'total_tokens': 179}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='e4cd152b-4eb1-47df-ac76-f88e79adbe19' tool_call_id='call_7ECwwNBDo4SH56oczErZJVRT'\n",
+      "content=\"It seems there is a consistent issue with processing the request for the magic function. Let's try using a different approach to resolve this.\" additional_kwargs={'tool_calls': [{'id': 'call_DMAL0UwBRijzuPjCTSwR2r17', 'function': {'arguments': '{\"input\":\"three\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 201, 'total_tokens': 242}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-cd9f4e5c-f881-462c-abe3-890e73f46a01-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 'three'}, 'id': 'call_DMAL0UwBRijzuPjCTSwR2r17', 'type': 'tool_call'}] usage_metadata={'input_tokens': 201, 'output_tokens': 41, 'total_tokens': 242}\n",
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
   ],
@@ -939,9 +942,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_FKiTkTd0Ffd4rkYSzERprf1M', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-b842f7b6-ec10-40f8-8c0e-baa220b77e91-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_FKiTkTd0Ffd4rkYSzERprf1M', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_o8Ym0u9UfzArhIm1lV7O0CXF', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d9faf125-1ff8-4de2-a75b-97e07d28dc4d-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_o8Ym0u9UfzArhIm1lV7O0CXF', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
      "------\n",
-      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to a step timeout.'}\n"
     ]
    }
   ],
@@ -957,7 +960,7 @@
    "        print(chunk)\n",
    "        print(\"------\")\n",
    "except TimeoutError:\n",
-    "    print({\"input\": query, \"output\": \"Agent stopped due to max iterations.\"})"
+    "    print({\"input\": query, \"output\": \"Agent stopped due to a step timeout.\"})"
   ]
  },
  {
@@ -978,7 +981,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_WoOB8juagB08xrP38twYlYKR', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-73dee47e-30ab-42c9-bb0c-6f227cac96cd-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_WoOB8juagB08xrP38twYlYKR', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_gsGzyhyvR25iNV6W9VR2TIdQ', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-9ad8f834-06c5-41cf-9eec-6b7e0f5e777e-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_gsGzyhyvR25iNV6W9VR2TIdQ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
      "------\n",
      "Task Cancelled.\n"
     ]
@@ -1089,10 +1092,10 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='what is the value of magic_function(3)?' id='4fa7fbe5-758c-47a3-9268-717665d10680'\n",
-      "content='' additional_kwargs={'tool_calls': [{'id': 'call_ujE0IQBbIQnxcF9gsZXQfdhF', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-65d689aa-baee-4342-a5d2-048feefab418-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_ujE0IQBbIQnxcF9gsZXQfdhF', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
-      "content='Sorry there was an error, please try again.' name='magic_function' id='ef8ddf1d-9ad7-4ac0-b784-b673c4d94bbd' tool_call_id='call_ujE0IQBbIQnxcF9gsZXQfdhF'\n",
-      "content='It seems there was an issue with the previous attempt. Let me try that again.' additional_kwargs={'tool_calls': [{'id': 'call_GcsAfCFUHJ50BN2IOWnwTbQ7', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 87, 'total_tokens': 119}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-54527c4b-8ff0-4ee8-8abf-224886bd222e-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_GcsAfCFUHJ50BN2IOWnwTbQ7', 'type': 'tool_call'}] usage_metadata={'input_tokens': 87, 'output_tokens': 32, 'total_tokens': 119}\n",
+      "content='what is the value of magic_function(3)?' id='6487a942-0a9a-4e8a-9556-553a45fa9c5a'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_pe5KVY5No9iT4JWqrm5MwL1D', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-04147325-fb72-462a-a1d9-6aa4e86e3d8a-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_pe5KVY5No9iT4JWqrm5MwL1D', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
+      "content='Sorry there was an error, please try again.' name='magic_function' id='bc0bf58f-7c6c-42ed-a96d-a2afa79f16a9' tool_call_id='call_pe5KVY5No9iT4JWqrm5MwL1D'\n",
+      "content=\"It seems there was an issue with processing the request. I'll try again.\" additional_kwargs={'tool_calls': [{'id': 'call_5rV7k3g7oW38bD9KUTsSxK8l', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 87, 'total_tokens': 117}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-6e43ffd4-fb6f-4222-8503-a50ae268c0be-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_5rV7k3g7oW38bD9KUTsSxK8l', 'type': 'tool_call'}] usage_metadata={'input_tokens': 87, 'output_tokens': 30, 'total_tokens': 117}\n",
      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
@@ -1322,7 +1325,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.2"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/tool_choice.ipynb
+++ b/docs/docs/how_to/tool_choice.ipynb
@@ -14,7 +14,7 @@
    "- [How to use a model to call tools](/docs/how_to/tool_calling)\n",
    ":::\n",
    "\n",
-    "In order to force our LLM to spelect a specific tool, we can use the `tool_choice` parameter to ensure certain behavior. First, let's define our model and tools:"
+    "In order to force our LLM to select a specific tool, we can use the `tool_choice` parameter to ensure certain behavior. First, let's define our model and tools:"
   ]
  },
  {
--- a/docs/docs/integrations/chat/cerebras.ipynb
+++ b/docs/docs/integrations/chat/cerebras.ipynb
@@ -0,0 +1,423 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "afaf8039",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Cerebras\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e49f1e0d",
+   "metadata": {},
+   "source": [
+    "# ChatCerebras\n",
+    "\n",
+    "This notebook provides a quick overview for getting started with Cerebras [chat models](/docs/concepts/#chat-models). For detailed documentation of all ChatCerebras features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html).\n",
+    "\n",
+    "At Cerebras, we've developed the world's largest and fastest AI processor, the Wafer-Scale Engine-3 (WSE-3). The Cerebras CS-3 system, powered by the WSE-3, represents a new class of AI supercomputer that sets the standard for generative AI training and inference with unparalleled performance and scalability.\n",
+    "\n",
+    "With Cerebras as your inference provider, you can:\n",
+    "- Achieve unprecedented speed for AI inference workloads\n",
+    "- Build commercially with high throughput\n",
+    "- Effortlessly scale your AI workloads with our seamless clustering technology\n",
+    "\n",
+    "Our CS-3 systems can be quickly and easily clustered to create the largest AI supercomputers in the world, making it simple to place and run the largest models. Leading corporations, research institutions, and governments are already using Cerebras solutions to develop proprietary models and train popular open-source models.\n",
+    "\n",
+    "Want to experience the power of Cerebras? Check out our [website](https://cerebras.ai) for more resources and explore options for accessing our technology through the Cerebras Cloud or on-premise deployments!\n",
+    "\n",
+    "For more information about Cerebras Cloud, visit [cloud.cerebras.ai](https://cloud.cerebras.ai/). Our API reference is available at [inference-docs.cerebras.ai](https://inference-docs.cerebras.ai/).\n",
+    "\n",
+    "## Overview\n",
+    "### Integration details\n",
+    "\n",
+    "| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/chat/cerebras) | Package downloads | Package latest |\n",
+    "| :--- | :--- | :---: | :---: |  :---: | :---: | :---: |\n",
+    "| [ChatCerebras](https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html) | [langchain-cerebras](https://api.python.langchain.com/en/latest/cerebras_api_reference.html) | ❌ | beta | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-cerebras?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-cerebras?style=flat-square&label=%20) |\n",
+    "\n",
+    "### Model features\n",
+    "| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
+    "| :---: | :---: | :---: | :---: |  :---: | :---: | :---: | :---: | :---: | :---: |\n",
+    "| ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅  | ✅ | ❌ | \n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "```bash\n",
+    "pip install langchain-cerebras\n",
+    "```\n",
+    "\n",
+    "### Credentials\n",
+    "\n",
+    "Get an API Key from [cloud.cerebras.ai](https://cloud.cerebras.ai/) and add it to your environment variables:\n",
+    "```\n",
+    "export CEREBRAS_API_KEY=\"your-api-key-here\"\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "ce19c2d6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Enter your Cerebras API key:  ········\n"
+     ]
+    }
+   ],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"CEREBRAS_API_KEY\"] = getpass.getpass(\"Enter your Cerebras API key: \")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72ee0c4b-9764-423a-9dbf-95129e185210",
+   "metadata": {},
+   "source": [
+    "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
+    "# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0730d6a1-c893-4840-9817-5e5251676d5d",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "The LangChain Cerebras integration lives in the `langchain-cerebras` package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "652d6238-1f87-422a-b135-f5abbb8652fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain-cerebras"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ea69675d",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our model object and generate chat completions:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21155898",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2b4f3e15",
+   "metadata": {},
+   "source": [
+    "## Invocation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "62e0dbc3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Je adore le programmation.', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 35, 'total_tokens': 42}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_be27ec77ff', 'finish_reason': 'stop'}, id='run-e5d66faf-019c-4ac6-9265-71093b13202d-0', usage_metadata={'input_tokens': 35, 'output_tokens': 7, 'total_tokens': 42})"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    (\n",
+    "        \"system\",\n",
+    "        \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
+    "    ),\n",
+    "    (\"human\", \"I love programming.\"),\n",
+    "]\n",
+    "ai_msg = llm.invoke(messages)\n",
+    "ai_msg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
+   "metadata": {},
+   "source": [
+    "## Chaining\n",
+    "\n",
+    "We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Ich liebe Programmieren!\\n\\n(Literally: I love programming!)', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 30, 'total_tokens': 44}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_be27ec77ff', 'finish_reason': 'stop'}, id='run-e1d2ebb8-76d1-471b-9368-3b68d431f16a-0', usage_metadata={'input_tokens': 30, 'output_tokens': 14, 'total_tokens': 44})"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"system\",\n",
+    "            \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
+    "        ),\n",
+    "        (\"human\", \"{input}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "chain = prompt | llm\n",
+    "chain.invoke(\n",
+    "    {\n",
+    "        \"input_language\": \"English\",\n",
+    "        \"output_language\": \"German\",\n",
+    "        \"input\": \"I love programming.\",\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0ec73a0e",
+   "metadata": {},
+   "source": [
+    "## Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "46fd21a7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "OH BOY! Let me tell you all about LIONS!\n",
+      "\n",
+      "Lions are the kings of the jungle! They're really big and have beautiful, fluffy manes around their necks. The mane is like a big, golden crown!\n",
+      "\n",
+      "Lions live in groups called prides. A pride is like a big family, and the lionesses (that's what we call the female lions) take care of the babies. The lionesses are like the mommies, and they teach the babies how to hunt and play.\n",
+      "\n",
+      "Lions are very good at hunting. They work together to catch their food, like zebras and antelopes. They're super fast and can run really, really fast!\n",
+      "\n",
+      "But lions are also very sleepy. They like to take long naps in the sun, and they can sleep for up to 20 hours a day! Can you imagine sleeping that much?\n",
+      "\n",
+      "Lions are also very loud. They roar really loudly to talk to each other. It's like they're saying, \"ROAR! I'm the king of the jungle!\"\n",
+      "\n",
+      "And guess what? Lions are very social. They like to play and cuddle with each other. They're like big, furry teddy bears!\n",
+      "\n",
+      "So, that's lions! Aren't they just the coolest?"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")\n",
+    "\n",
+    "system = \"You are an expert on animals who must answer questions in a manner that a 5 year old can understand.\"\n",
+    "human = \"I want to learn more about this animal: {animal}\"\n",
+    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
+    "\n",
+    "chain = prompt | llm\n",
+    "\n",
+    "for chunk in chain.stream({\"animal\": \"Lion\"}):\n",
+    "    print(chunk.content, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f67b6132",
+   "metadata": {},
+   "source": [
+    "## Async"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a3a45baf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Ice', response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 36, 'total_tokens': 38}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_be27ec77ff', 'finish_reason': 'stop'}, id='run-7434bdde-1bec-44cf-827b-8d978071dfe8-0', usage_metadata={'input_tokens': 36, 'output_tokens': 2, 'total_tokens': 38})"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"human\",\n",
+    "            \"Let's play a game of opposites. What's the opposite of {topic}? Just give me the answer with no extra input.\",\n",
+    "        )\n",
+    "    ]\n",
+    ")\n",
+    "chain = prompt | llm\n",
+    "await chain.ainvoke({\"topic\": \"fire\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f9d9945",
+   "metadata": {},
+   "source": [
+    "## Async Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "c7448e0f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "In the distant reaches of the cosmos, there existed a peculiar phenomenon known as the \"Eclipse of Eternity,\" a swirling vortex of darkness that had been shrouded in mystery for eons. It was said that this blackhole, born from the cataclysmic collision of two ancient stars, had been slowly devouring the fabric of space-time itself, warping the very essence of reality. As the celestial bodies of the galaxy danced around it, they began to notice a strange, almost imperceptible distortion in the fabric of space, as if the blackhole's gravitational pull was exerting an influence on the very course of events itself.\n",
+      "\n",
+      "As the centuries passed, astronomers from across the galaxy became increasingly fascinated by the Eclipse of Eternity, pouring over ancient texts and scouring the cosmos for any hint of its secrets. One such scholar, a brilliant and reclusive astrophysicist named Dr. Elara Vex, became obsessed with unraveling the mysteries of the blackhole. She spent years pouring over ancient texts, deciphering cryptic messages and hidden codes that hinted at the existence of a long-lost civilization that had once thrived in the heart of the blackhole itself. According to legend, this ancient civilization had possessed knowledge of the cosmos that was beyond human comprehension, and had used their mastery of the universe to create the Eclipse of Eternity as a gateway to other dimensions.\n",
+      "\n",
+      "As Dr. Vex delved deeper into her research, she began to experience strange and vivid dreams, visions that seemed to transport her to the very heart of the blackhole itself. In these dreams, she saw ancient beings, their faces twisted in agony as they were consumed by the void. She saw stars and galaxies, their light warped and distorted by the blackhole's gravitational pull. And she saw the Eclipse of Eternity itself, its swirling vortex of darkness pulsing with an otherworldly energy that seemed to be calling to her. As the dreams grew more vivid and more frequent, Dr. Vex became convinced that she was being drawn into the heart of the blackhole, and that the secrets of the universe lay waiting for her on the other side."
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_cerebras import ChatCerebras\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "llm = ChatCerebras(\n",
+    "    model=\"llama3.1-70b\",\n",
+    "    # other params...\n",
+    ")\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"human\",\n",
+    "            \"Write a long convoluted story about {subject}. I want {num_paragraphs} paragraphs.\",\n",
+    "        )\n",
+    "    ]\n",
+    ")\n",
+    "chain = prompt | llm\n",
+    "\n",
+    "async for chunk in chain.astream({\"num_paragraphs\": 3, \"subject\": \"blackholes\"}):\n",
+    "    print(chunk.content, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For detailed documentation of all ChatCerebras features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/chat/huggingface.ipynb
+++ b/docs/docs/integrations/chat/huggingface.ipynb
@@ -404,6 +404,7 @@
    "        max_new_tokens=512,\n",
    "        do_sample=False,\n",
    "        repetition_penalty=1.03,\n",
+    "        return_full_text=False,\n",
    "    ),\n",
    "    model_kwargs={\"quantization_config\": quantization_config},\n",
    ")\n",
--- a/docs/docs/integrations/chat/symblai_nebula.ipynb
+++ b/docs/docs/integrations/chat/symblai_nebula.ipynb
@@ -0,0 +1,297 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "53fbf15f",
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
+   "source": [
+    "---\n",
+    "sidebar_label: Nebula (Symbl.ai)\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bf733a38-db84-4363-89e2-de6735c37230",
+   "metadata": {},
+   "source": [
+    "# Nebula (Symbl.ai)\n",
+    "\n",
+    "## Overview\n",
+    "This notebook covers how to get started with [Nebula](https://docs.symbl.ai/docs/nebula-llm) - Symbl.ai's chat model.\n",
+    "\n",
+    "### Integration details\n",
+    "Head to the [API reference](https://docs.symbl.ai/reference/nebula-chat) for detailed documentation.\n",
+    "\n",
+    "### Model features: TODO"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3607d67e-e56c-4102-bbba-df2edc0e109e",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "### Credentials\n",
+    "To get started, request a [Nebula API key](https://platform.symbl.ai/#/login) and set the `NEBULA_API_KEY` environment variable:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2108b517-1e8d-473d-92fa-4f930e8072a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"NEBULA_API_KEY\"] = getpass.getpass()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "68b44357",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "The integration is set up in the `langchain-community` package."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4c26754b-b3c9-4d93-8f36-43049bd943bf",
+   "metadata": {},
+   "source": [
+    "## Instantiation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0fdd26e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.chat_models.symblai_nebula import ChatNebula\n",
+    "from langchain_core.messages import AIMessage, HumanMessage, SystemMessage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "chat = ChatNebula(max_tokens=1024, temperature=0.5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2a915547",
+   "metadata": {},
+   "source": [
+    "## Invocation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=[{'role': 'human', 'text': 'What is the capital of France?'}, {'role': 'assistant', 'text': 'The capital of France is Paris.'}])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    SystemMessage(\n",
+    "        content=\"You are a helpful assistant that answers general knowledge questions.\"\n",
+    "    ),\n",
+    "    HumanMessage(content=\"What is the capital of France?\"),\n",
+    "]\n",
+    "chat.invoke(messages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9723913f",
+   "metadata": {},
+   "source": [
+    "### Async"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=[{'role': 'human', 'text': 'What is the capital of France?'}, {'role': 'assistant', 'text': 'The capital of France is Paris.'}])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "await chat.ainvoke(messages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e0a1d3b4",
+   "metadata": {},
+   "source": [
+    "### Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " The capital of France is Paris."
+     ]
+    }
+   ],
+   "source": [
+    "for chunk in chat.stream(messages):\n",
+    "    print(chunk.content, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f91b7c7",
+   "metadata": {},
+   "source": [
+    "### Batch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "054dc648",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[AIMessage(content=[{'role': 'human', 'text': 'What is the capital of France?'}, {'role': 'assistant', 'text': 'The capital of France is Paris.'}])]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat.batch([messages])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e59a5519",
+   "metadata": {},
+   "source": [
+    "## Chaining"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "6455f67b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_template(\"Tell me a joke about {topic}\")\n",
+    "chain = prompt | chat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "deb1e2a1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=[{'role': 'human', 'text': 'Tell me a joke about cows'}, {'role': 'assistant', 'text': \"Sure, here's a joke about cows:\\n\\nWhy did the cow cross the road?\\n\\nTo get to the udder side!\"}])"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.invoke({\"topic\": \"cows\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bb9d4755",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "Check out the [API reference](https://python.langchain.com/v0.2/api_reference/community/chat_models/langchain_community.chat_models.symblai_nebula.ChatNebula.html) for more detail."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/document_loaders/box.ipynb
+++ b/docs/docs/integrations/document_loaders/box.ipynb
@@ -15,7 +15,7 @@
   "source": [
    "# BoxLoader\n",
    "\n",
-    "This notebook provides a quick overview for getting started with Box [document loader](/docs/integrations/document_loaders/). For detailed documentation of all BoxLoader features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.langchain_box_loader.BoxLoader.html).\n",
+    "This notebook provides a quick overview for getting started with Box [document loader](/docs/integrations/document_loaders/). For detailed documentation of all BoxLoader features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.box.BoxLoader.html).\n",
    "\n",
    "\n",
    "## Overview\n",
@@ -34,7 +34,7 @@
    "\n",
    "| Class | Package | Local | Serializable | JS support|\n",
    "| :--- | :--- | :---: | :---: |  :---: |\n",
-    "| [BoxLoader](https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.langchain_boxloader.BoxLoader.html) | [langchain_box](https://python.langchain.com/v0.2/api_reference/box/index.html) | ✅ | ❌ | ❌ | \n",
+    "| [BoxLoader](https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.box.BoxLoader.html) | [langchain_box](https://python.langchain.com/v0.2/api_reference/box/index.html) | ✅ | ❌ | ❌ | \n",
    "### Loader features\n",
    "| Source | Document Lazy Loading | Async Support\n",
    "| :---: | :---: | :---: | \n",
@@ -244,7 +244,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For detailed documentation of all BoxLoader features and configurations head to the API reference: https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.langchain_box_loader.BoxLoader.html\n",
+    "For detailed documentation of all BoxLoader features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.box.BoxLoader.html)\n",
    "\n",
    "\n",
    "## Help\n",
--- a/docs/docs/integrations/document_loaders/unstructured_file.ipynb
+++ b/docs/docs/integrations/document_loaders/unstructured_file.ipynb
@@ -105,7 +105,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
   "id": "79d3e549",
   "metadata": {},
   "outputs": [],
@@ -131,7 +131,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
   "id": "8da59ef8",
   "metadata": {},
   "outputs": [
@@ -139,17 +139,16 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "INFO: NumExpr defaulting to 12 threads.\n",
      "INFO: pikepdf C++ to Python logger bridge initialized\n"
     ]
    },
    {
     "data": {
      "text/plain": [
-       "Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
+       "Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
      ]
     },
-     "execution_count": 2,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -162,7 +161,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
   "id": "97f7aa1f",
   "metadata": {},
   "outputs": [
@@ -170,7 +169,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}\n"
+      "{'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}\n"
     ]
    }
   ],
@@ -188,17 +187,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
   "id": "b05604d2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
+       "Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -279,7 +278,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
   "id": "386eb63c",
   "metadata": {},
   "outputs": [
@@ -299,7 +298,7 @@
       "Document(metadata={'source': 'example_data/fake.docx', 'category_depth': 0, 'filename': 'fake.docx', 'languages': ['por', 'cat'], 'filetype': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'category': 'Title', 'element_id': '56d531394823d81787d77a04462ed096'}, page_content='Lorem ipsum dolor sit amet.')"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -327,7 +326,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
   "id": "a3d7c846",
   "metadata": {},
   "outputs": [
@@ -375,16 +374,22 @@
    "### Unstructured SDK Client\n",
    "\n",
    "Partitioning with the Unstructured API relies on the [Unstructured SDK\n",
-    "Client](https://docs.unstructured.io/api-reference/api-services/sdk).\n",
+    "Client](https://docs.unstructured.io/api-reference/api-services/accessing-unstructured-api).\n",
    "\n",
-    "Below is an example showing how you can customize some features of the client and use your own `requests.Session()`, pass in an alternative `server_url`, or customize the `RetryConfig` object for more control over how failed requests are handled.\n",
-    "\n",
-    "Note that the example below may not use the latest version of the UnstructuredClient and there could be breaking changes in future releases. For the latest examples, refer to the [Unstructured Python SDK](https://docs.unstructured.io/api-reference/api-services/sdk-python) docs."
+    "If you want to customize the client, you will have to pass an `UnstructuredClient` instance to the `UnstructuredLoader`. Below is an example showing how you can customize features of the client such as using your own `requests.Session()`, passing an alternative `server_url`, and customizing the `RetryConfig` object. For more information about customizing the client or what additional parameters the sdk client accepts, refer to the [Unstructured Python SDK](https://docs.unstructured.io/api-reference/api-services/sdk-python) docs and the client section of the [API Parameters](https://docs.unstructured.io/api-reference/api-services/api-parameters) docs. Note that all API Parameters should be passed to the `UnstructuredLoader`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ebb69c85",
+   "metadata": {},
+   "source": [
+    "<div class=\"alert alert-block alert-warning\"><b>Warning:</b> The example below may not use the latest version of the UnstructuredClient and there could be breaking changes in future releases. For the latest examples, refer to the <a href=\"https://docs.unstructured.io/api-reference/api-services/sdk-python\">Unstructured Python SDK</a> docs.</div>"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
   "id": "58e55264",
   "metadata": {},
   "outputs": [
@@ -394,13 +399,15 @@
     "text": [
      "INFO: Preparing to split document for partition.\n",
      "INFO: Concurrency level set to 5\n",
-      "INFO: Splitting pages 1 to 16 (16 total)\n",
-      "INFO: Determined optimal split size of 4 pages.\n",
-      "INFO: Partitioning 4 files with 4 page(s) each.\n",
-      "INFO: Partitioning set #1 (pages 1-4).\n",
-      "INFO: Partitioning set #2 (pages 5-8).\n",
-      "INFO: Partitioning set #3 (pages 9-12).\n",
-      "INFO: Partitioning set #4 (pages 13-16).\n",
+      "INFO: Splitting pages 1 to 10 (10 total)\n",
+      "INFO: Determined optimal split size of 2 pages.\n",
+      "INFO: Partitioning 5 files with 2 page(s) each.\n",
+      "INFO: Partitioning set #1 (pages 1-2).\n",
+      "INFO: Partitioning set #2 (pages 3-4).\n",
+      "INFO: Partitioning set #3 (pages 5-6).\n",
+      "INFO: Partitioning set #4 (pages 7-8).\n",
+      "INFO: Partitioning set #5 (pages 9-10).\n",
+      "INFO: HTTP Request: POST https://api.unstructuredapp.io/general/v0/general \"HTTP/1.1 200 OK\"\n",
      "INFO: HTTP Request: POST https://api.unstructuredapp.io/general/v0/general \"HTTP/1.1 200 OK\"\n",
      "INFO: HTTP Request: POST https://api.unstructuredapp.io/general/v0/general \"HTTP/1.1 200 OK\"\n",
      "INFO: HTTP Request: POST https://api.unstructuredapp.io/general/v0/general \"HTTP/1.1 200 OK\"\n",
@@ -408,6 +415,7 @@
      "INFO: Successfully partitioned set #2, elements added to the final result.\n",
      "INFO: Successfully partitioned set #3, elements added to the final result.\n",
      "INFO: Successfully partitioned set #4, elements added to the final result.\n",
+      "INFO: Successfully partitioned set #5, elements added to the final result.\n",
      "INFO: Successfully partitioned the document.\n"
     ]
    },
@@ -429,8 +437,8 @@
    "    api_key_auth=os.getenv(\n",
    "        \"UNSTRUCTURED_API_KEY\"\n",
    "    ),  # Note: the client API param is \"api_key_auth\" instead of \"api_key\"\n",
-    "    client=requests.Session(),\n",
-    "    server_url=\"https://api.unstructuredapp.io/general/v0/general\",\n",
+    "    client=requests.Session(),  # Define your own requests session\n",
+    "    server_url=\"https://api.unstructuredapp.io/general/v0/general\",  # Define your own api url\n",
    "    retry_config=RetryConfig(\n",
    "        strategy=\"backoff\",\n",
    "        retry_connection_errors=True,\n",
@@ -440,13 +448,15 @@
    "            exponent=1.5,\n",
    "            max_elapsed_time=900000,\n",
    "        ),\n",
-    "    ),\n",
+    "    ),  # Define your own retry config\n",
    ")\n",
    "\n",
    "loader = UnstructuredLoader(\n",
    "    \"./example_data/layout-parser-paper.pdf\",\n",
    "    partition_via_api=True,\n",
    "    client=client,\n",
+    "    split_pdf_page=True,\n",
+    "    split_pdf_page_range=[1, 10],\n",
    ")\n",
    "\n",
    "docs = loader.load()\n",
@@ -479,17 +489,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
   "id": "e9f1c20d",
   "metadata": {},
   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING: Partitioning locally even though api_key is defined since partition_via_api=False.\n"
-     ]
-    },
    {
     "name": "stdout",
     "output_type": "stream",
@@ -542,7 +545,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.10.13"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/llms/huggingface_endpoint.ipynb
+++ b/docs/docs/integrations/llms/huggingface_endpoint.ipynb
@@ -210,6 +210,13 @@
    ")\n",
    "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This same `HuggingFaceEndpoint` class can be used with a local [HuggingFace TGI instance](https://github.com/huggingface/text-generation-inference/blob/main/docs/source/index.md) serving the LLM. Check out the TGI [repository](https://github.com/huggingface/text-generation-inference/tree/main) for details on various hardware (GPU, TPU, Gaudi...) support."
+   ]
  }
 ],
 "metadata": {
--- a/docs/docs/integrations/llms/ipex_llm.ipynb
+++ b/docs/docs/integrations/llms/ipex_llm.ipynb
@@ -6,16 +6,272 @@
   "source": [
    "# IPEX-LLM\n",
    "\n",
-    "> [IPEX-LLM](https://github.com/intel-analytics/ipex-llm/) is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency. \n",
+    "> [IPEX-LLM](https://github.com/intel-analytics/ipex-llm) is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency.\n",
    "\n",
-    "This example goes over how to use LangChain to interact with `ipex-llm` for text generation. \n"
+    "- [IPEX-LLM on Intel GPU](#ipex-llm-on-intel-gpu)\n",
+    "- [IPEX-LLM on Intel CPU](#ipex-llm-on-intel-cpu)\n",
+    "\n",
+    "## IPEX-LLM on Intel GPU\n",
+    "\n",
+    "This example goes over how to use LangChain to interact with `ipex-llm` for text generation on Intel GPU. \n",
+    "\n",
+    "> **Note**\n",
+    ">\n",
+    "> It is recommended that only Windows users with Intel Arc A-Series GPU (except for Intel Arc A300-Series or Pro A60) run Jupyter notebook directly for section \"IPEX-LLM on Intel GPU\". For other cases (e.g. Linux users, Intel iGPU, etc.), it is recommended to run the code with Python scripts in terminal for best experiences.\n",
+    "\n",
+    "### Install Prerequisites\n",
+    "To benefit from IPEX-LLM on Intel GPUs, there are several prerequisite steps for tools installation and environment preparation.\n",
+    "\n",
+    "If you are a Windows user, visit the [Install IPEX-LLM on Windows with Intel GPU Guide](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_windows_gpu.md), and follow [Install Prerequisites](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_windows_gpu.md#install-prerequisites) to update GPU driver (optional) and install Conda.\n",
+    "\n",
+    "If you are a Linux user, visit the [Install IPEX-LLM on Linux with Intel GPU](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md), and follow [**Install Prerequisites**](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md#install-prerequisites) to install GPU driver, Intel® oneAPI Base Toolkit 2024.0, and Conda.\n",
+    "\n",
+    "### Setup\n",
+    "\n",
+    "After the prerequisites installation, you should have created a conda environment with all prerequisites installed. **Start the jupyter service in this conda environment**:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain langchain-community"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Setup"
+    "Install IEPX-LLM for running LLMs locally on Intel GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **Note**\n",
+    ">\n",
+    "> You can also use `https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/` as the extra-indel-url.\n",
+    "\n",
+    "### Runtime Configuration\n",
+    "\n",
+    "For optimal performance, it is recommended to set several environment variables based on your device:\n",
+    "\n",
+    "#### For Windows Users with Intel Core Ultra integrated GPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"SYCL_CACHE_PERSISTENT\"] = \"1\"\n",
+    "os.environ[\"BIGDL_LLM_XMX_DISABLED\"] = \"1\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### For Windows Users with Intel Arc A-Series GPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"SYCL_CACHE_PERSISTENT\"] = \"1\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **Note**\n",
+    ">\n",
+    "> For the first time that each model runs on Intel iGPU/Intel Arc A300-Series or Pro A60, it may take several minutes to compile.\n",
+    ">\n",
+    "> For other GPU type, please refer to [here](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Overview/install_gpu.md#runtime-configuration) for Windows users, and  [here](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Overview/install_gpu.md#runtime-configuration-1) for Linux users.\n",
+    "\n",
+    "\n",
+    "### Basic Usage\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import warnings\n",
+    "\n",
+    "from langchain.chains import LLMChain\n",
+    "from langchain_community.llms import IpexLLM\n",
+    "from langchain_core.prompts import PromptTemplate\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\", category=UserWarning, message=\".*padding_mask.*\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Specify the prompt template for your model. In this example, we use the [vicuna-1.5](https://huggingface.co/lmsys/vicuna-7b-v1.5) model. If you're working with a different model, choose a proper template accordingly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "template = \"USER: {question}\\nASSISTANT:\"\n",
+    "prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load the model locally using IpexLLM using `IpexLLM.from_model_id`. It will load the model directly in its Huggingface format and convert it automatically to low-bit format for inference. Set `device` to `\"xpu\"` in `model_kwargs` when initializing IpexLLM in order to load the LLM model to Intel GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = IpexLLM.from_model_id(\n",
+    "    model_id=\"lmsys/vicuna-7b-v1.5\",\n",
+    "    model_kwargs={\n",
+    "        \"temperature\": 0,\n",
+    "        \"max_length\": 64,\n",
+    "        \"trust_remote_code\": True,\n",
+    "        \"device\": \"xpu\",\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use it in Chains"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_chain = prompt | llm\n",
+    "\n",
+    "question = \"What is AI?\"\n",
+    "output = llm_chain.invoke(question)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save/Load Low-bit Model\n",
+    "Alternatively, you might save the low-bit model to disk once and use `from_model_id_low_bit` instead of `from_model_id` to reload it for later use - even across different machines. It is space-efficient, as the low-bit model demands significantly less disk space than the original model. And `from_model_id_low_bit` is also more efficient than `from_model_id` in terms of speed and memory usage, as it skips the model conversion step. You can similarly set `device` to `\"xpu\"` in `model_kwargs` in order to load the LLM model to Intel GPU. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To save the low-bit model, use `save_low_bit` as follows."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "saved_lowbit_model_path = \"./vicuna-7b-1.5-low-bit\"  # path to save low-bit model\n",
+    "llm.model.save_low_bit(saved_lowbit_model_path)\n",
+    "del llm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load the model from saved lowbit model path as follows. \n",
+    "> Note that the saved path for the low-bit model only includes the model itself but not the tokenizers. If you wish to have everything in one place, you will need to manually download or copy the tokenizer files from the original model's directory to the location where the low-bit model is saved."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_lowbit = IpexLLM.from_model_id_low_bit(\n",
+    "    model_id=saved_lowbit_model_path,\n",
+    "    tokenizer_id=\"lmsys/vicuna-7b-v1.5\",\n",
+    "    # tokenizer_name=saved_lowbit_model_path,  # copy the tokenizers to saved path if you want to use it this way\n",
+    "    model_kwargs={\n",
+    "        \"temperature\": 0,\n",
+    "        \"max_length\": 64,\n",
+    "        \"trust_remote_code\": True,\n",
+    "        \"device\": \"xpu\",\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the loaded model in Chains:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_chain = prompt | llm_lowbit\n",
+    "\n",
+    "\n",
+    "question = \"What is AI?\"\n",
+    "output = llm_chain.invoke(question)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IPEX-LLM on Intel CPU\n",
+    "\n",
+    "This example goes over how to use LangChain to interact with `ipex-llm` for text generation on Intel CPU.\n",
+    "\n",
+    "### Setup"
   ]
  },
  {
@@ -33,7 +289,9 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Install IEPX-LLM for running LLMs locally on Intel CPU."
+    "Install IEPX-LLM for running LLMs locally on Intel CPU:\n",
+    "\n",
+    "#### For Windows users:"
   ]
  },
  {
@@ -49,7 +307,23 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Basic Usage"
+    "#### For Linux users:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Basic Usage"
   ]
  },
  {
@@ -126,15 +400,11 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Save/Load Low-bit Model\n",
-    "Alternatively, you might save the low-bit model to disk once and use `from_model_id_low_bit` instead of `from_model_id` to reload it for later use - even across different machines. It is space-efficient, as the low-bit model demands significantly less disk space than the original model. And `from_model_id_low_bit` is also more efficient than `from_model_id` in terms of speed and memory usage, as it skips the model conversion step."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To save the low-bit model, use `save_low_bit` as follows."
+    "### Save/Load Low-bit Model\n",
+    "\n",
+    "Alternatively, you might save the low-bit model to disk once and use `from_model_id_low_bit` instead of `from_model_id` to reload it for later use - even across different machines. It is space-efficient, as the low-bit model demands significantly less disk space than the original model. And `from_model_id_low_bit` is also more efficient than `from_model_id` in terms of speed and memory usage, as it skips the model conversion step.\n",
+    "\n",
+    "To save the low-bit model, use `save_low_bit` as follows:"
   ]
  },
  {
@@ -152,7 +422,8 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Load the model from saved lowbit model path as follows. \n",
+    "Load the model from saved lowbit model path as follows.\n",
+    "\n",
    "> Note that the saved path for the low-bit model only includes the model itself but not the tokenizers. If you wish to have everything in one place, you will need to manually download or copy the tokenizer files from the original model's directory to the location where the low-bit model is saved."
   ]
  },
@@ -192,22 +463,8 @@
  }
 ],
 "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "name": "python"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/llms/openai.ipynb
+++ b/docs/docs/integrations/llms/openai.ipynb
@@ -42,18 +42,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "efcdb2b6",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Enter your OpenAI API key:  ········\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import getpass\n",
    "import os\n",
@@ -72,7 +64,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "id": "52fa46e8",
   "metadata": {},
   "outputs": [],
@@ -122,7 +114,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "id": "6fb585dd",
   "metadata": {
    "tags": []
@@ -144,17 +136,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "id": "85b49da0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "\"\\n\\nI'm an AI language model created by OpenAI, so I don't have feelings or emotions. But thank you for asking! How can I assist you today?\""
+       "'\\n\\nI am an AI and do not have emotions like humans do, so I am always functioning at my optimal level. Thank you for asking! How can I assist you today?'"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -173,16 +165,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "a641dbd9",
   "metadata": {
    "tags": []
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\nIch liebe Programmieren.'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "from langchain_core.prompts import PromptTemplate\n",
    "\n",
-    "prompt = PromptTemplate(\"How to say {input} in {output_language}:\\n\")\n",
+    "prompt = PromptTemplate.from_template(\"How to say {input} in {output_language}:\\n\")\n",
    "\n",
    "chain = prompt | llm\n",
    "chain.invoke(\n",
@@ -205,7 +208,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "id": "55142cec",
   "metadata": {},
   "outputs": [],
@@ -221,8 +224,8 @@
   ]
  },
  {
-   "cell_type": "markdown",
-   "id": "73e207dd",
+   "cell_type": "raw",
+   "id": "2fd99e97-013f-4c28-bb47-426faa42a2cf",
   "metadata": {},
   "source": [
    "## API reference\n",
@@ -247,7 +250,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.4"
  },
  "vscode": {
   "interpreter": {
--- a/docs/docs/integrations/llms/predibase.ipynb
+++ b/docs/docs/integrations/llms/predibase.ipynb
@@ -70,6 +70,10 @@
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"e2e_nlg\",\n",
    "    adapter_version=1,\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -87,6 +91,10 @@
    "    predibase_api_key=os.environ.get(\"PREDIBASE_API_TOKEN\"),\n",
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"predibase/e2e_nlg\",\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -96,7 +104,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "response = model.invoke(\"Can you recommend me a nice dry wine?\")\n",
+    "# Optionally use `kwargs` to dynamically overwrite \"generate()\" settings.\n",
+    "response = model.invoke(\n",
+    "    \"Can you recommend me a nice dry wine?\",\n",
+    "    **{\"temperature\": 0.5, \"max_new_tokens\": 1024},\n",
+    ")\n",
    "print(response)"
   ]
  },
@@ -127,6 +139,10 @@
    "    model=\"mistral-7b\",\n",
    "    predibase_api_key=os.environ.get(\"PREDIBASE_API_TOKEN\"),\n",
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -147,6 +163,10 @@
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"e2e_nlg\",\n",
    "    adapter_version=1,\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -162,6 +182,10 @@
    "    predibase_api_key=os.environ.get(\"PREDIBASE_API_TOKEN\"),\n",
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"predibase/e2e_nlg\",\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")"
   ]
  },
@@ -259,6 +283,10 @@
    "    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)\n",
    "    adapter_id=\"my-finetuned-adapter-id\",  # Supports both, Predibase-hosted and HuggingFace-hosted adapter repositories.\n",
    "    adapter_version=1,  # required for Predibase-hosted adapters (ignored for HuggingFace-hosted adapters)\n",
+    "    **{\n",
+    "        \"api_token\": os.environ.get(\"HUGGING_FACE_HUB_TOKEN\"),\n",
+    "        \"max_new_tokens\": 5,  # default is 256\n",
+    "    },\n",
    ")\n",
    "# replace my-base-LLM with the name of your choice of a serverless base model in Predibase"
   ]
@@ -269,7 +297,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# response = model.invoke(\"Can you help categorize the following emails into positive, negative, and neutral?\")"
+    "# Optionally use `kwargs` to dynamically overwrite \"generate()\" settings.\n",
+    "# response = model.invoke(\"Can you help categorize the following emails into positive, negative, and neutral?\", **{\"temperature\": 0.5, \"max_new_tokens\": 1024})"
   ]
  }
 ],
--- a/docs/docs/integrations/platforms/anthropic.mdx
+++ b/docs/docs/integrations/platforms/anthropic.mdx
@@ -26,6 +26,7 @@ from langchain_anthropic import ChatAnthropic
 model = ChatAnthropic(model='claude-3-opus-20240229')
 ```

+
 ## LLMs

 ### [Legacy] AnthropicLLM
--- a/docs/docs/integrations/platforms/aws.mdx
+++ b/docs/docs/integrations/platforms/aws.mdx
@@ -204,7 +204,7 @@ AWS offers services for computing, databases, storage, analytics, and other func
 See a [usage example](/docs/integrations/vectorstores/documentdb).

 ```python
-from langchain.vectorstores import DocumentDBVectorSearch
+from langchain_community.vectorstores import DocumentDBVectorSearch
 ```
 ### Amazon MemoryDB 
 [Amazon MemoryDB](https://aws.amazon.com/memorydb/) is a durable, in-memory database service that delivers ultra-fast performance. MemoryDB is compatible with Redis OSS, a popular open source data store, 
@@ -305,7 +305,7 @@ pip install boto3
 See a [usage example](/docs/integrations/memory/aws_dynamodb).

 ```python
-from langchain.memory import DynamoDBChatMessageHistory
+from langchain_community.chat_message_histories import DynamoDBChatMessageHistory
 ```

 ## Graphs
@@ -333,6 +333,12 @@ from langchain_community.chains.graph_qa.neptune_sparql import NeptuneSparqlQACh

 ## Callbacks

+### Bedrock token usage
+
+```python
+from langchain_community.callbacks.bedrock_anthropic_callback import BedrockAnthropicTokenUsageCallbackHandler
+```
+
 ### SageMaker Tracking

 >[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a fully managed service that is used to quickly 
@@ -351,7 +357,7 @@ pip install google-search-results sagemaker
 See a [usage example](/docs/integrations/callbacks/sagemaker_tracking).

 ```python
-from langchain.callbacks import SageMakerCallbackHandler
+from langchain_community.callbacks import SageMakerCallbackHandler
 ```

 ## Chains
--- a/docs/docs/integrations/platforms/google.mdx
+++ b/docs/docs/integrations/platforms/google.mdx
@@ -58,7 +58,7 @@ The value of image_url can be any of the following:

 ### Vertex AI

-Access PaLM chat models like `chat-bison` and `codechat-bison` via Google Cloud.
+Access chat models like `Gemini` via Google Cloud.

 We need to install `langchain-google-vertexai` python package.

@@ -72,6 +72,122 @@ See a [usage example](/docs/integrations/chat/google_vertex_ai_palm).
 from langchain_google_vertexai import ChatVertexAI
 ```

+### Chat Anthropic on Vertex AI Model Garden
+
+See a [usage example](/docs/integrations/llms/google_vertex_ai_palm).
+
+```python
+from langchain_google_vertexai.model_garden import ChatAnthropicVertex
+```
+
+### Chat Llama on Vertex AI Model Garden
+
+```python
+from langchain_google_vertexai.model_garden_maas.llama import VertexModelGardenLlama
+```
+
+### Chat Mistral on Vertex AI Model Garden
+
+```python
+from langchain_google_vertexai.model_garden_maas.mistral import VertexModelGardenMistral
+```
+
+### Chat Gemma local from Hugging Face
+
+>Local `Gemma` model loaded from `HuggingFace`.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaChatLocalHF
+```
+
+### Chat Gemma local from Kaggle
+
+>Local `Gemma` model loaded from `Kaggle`.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaChatLocalKaggle
+```
+
+### Chat Gemma on Vertex AI Model Garden
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaChatVertexAIModelGarden
+```
+
+### Vertex AI image captioning chat
+
+>Implementation of the `Image Captioning model` as a chat.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIImageCaptioningChat
+```
+
+### Vertex AI image editor chat
+
+>Given an image and a prompt, edit the image. Currently only supports mask-free editing.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIImageEditorChat
+```
+
+### Vertex AI image generator chat
+
+>Generates an image from a prompt.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIImageGeneratorChat
+```
+
+### Vertex AI visual QnA chat
+
+>Chat implementation of a visual QnA model
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIVisualQnAChat
+```
+
 ## LLMs

 ### Google Generative AI
@@ -106,9 +222,63 @@ See a [usage example](/docs/integrations/llms/google_vertex_ai_palm#vertex-model
 from langchain_google_vertexai import VertexAIModelGarden
 ```

+### Gemma local from Hugging Face
+
+>Local `Gemma` model loaded from `HuggingFace`.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaLocalHF
+```
+
+### Gemma local from Kaggle
+
+>Local `Gemma` model loaded from `Kaggle`.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaLocalKaggle
+```
+
+### Gemma on Vertex AI Model Garden
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.gemma import GemmaVertexAIModelGarden
+```
+
+### Vertex AI image captioning
+
+>Implementation of the `Image Captioning model` as an LLM.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.vision_models import VertexAIImageCaptioning
+```
+
 ## Embedding models

-### Google Generative AI Embeddings
+### Google Generative AI embedding

 See a [usage example](/docs/integrations/text_embedding/google_generative_ai).

@@ -126,6 +296,18 @@ export GOOGLE_API_KEY=your-api-key
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 ```

+### Google Generative AI server-side embedding
+
+Install the python package:
+
+```bash
+pip install langchain-google-genai
+```
+
+```python
+from langchain_google_genai.google_vector_store import ServerSideEmbedding
+```
+
 ### Vertex AI

 We need to install `langchain-google-vertexai` python package.
@@ -140,7 +322,7 @@ See a [usage example](/docs/integrations/text_embedding/google_vertex_ai_palm).
 from langchain_google_vertexai import VertexAIEmbeddings
 ```

-### Palm Embedding
+### Palm embedding

 We need to install `langchain-community` python package.

@@ -189,6 +371,7 @@ from langchain_google_community import BigQueryLoader
 ### Bigtable

 > [Google Cloud Bigtable](https://cloud.google.com/bigtable/docs) is Google's fully managed NoSQL Big Data database service in Google Cloud.
+
 Install the python package:

 ```bash
@@ -204,6 +387,7 @@ from langchain_google_bigtable import BigtableLoader
 ### Cloud SQL for MySQL

 > [Google Cloud SQL for MySQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your MySQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -213,12 +397,13 @@ pip install langchain-google-cloud-sql-mysql
 See [usage example](/docs/integrations/document_loaders/google_cloud_sql_mysql).

 ```python
-from langchain_google_cloud_sql_mysql import MySQLEngine, MySQLDocumentLoader
+from langchain_google_cloud_sql_mysql import MySQLEngine, MySQLLoader
 ```

 ### Cloud SQL for SQL Server

 > [Google Cloud SQL for SQL Server](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your SQL Server databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -234,6 +419,7 @@ from langchain_google_cloud_sql_mssql import MSSQLEngine, MSSQLLoader
 ### Cloud SQL for PostgreSQL

 > [Google Cloud SQL for PostgreSQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your PostgreSQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -318,6 +504,7 @@ from langchain_google_community import GoogleDriveLoader
 ### Firestore (Native Mode)

 > [Google Cloud Firestore](https://cloud.google.com/firestore/docs/) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
+
 Install the python package:

 ```bash
@@ -334,6 +521,7 @@ from langchain_google_firestore import FirestoreLoader

 > [Google Cloud Firestore in Datastore mode](https://cloud.google.com/datastore/docs) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
 > Firestore is the newest version of Datastore and introduces several improvements over Datastore.
+
 Install the python package:

 ```bash
@@ -349,6 +537,7 @@ from langchain_google_datastore import DatastoreLoader
 ### Memorystore for Redis

 > [Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) is a fully managed Redis service for Google Cloud. Applications running on Google Cloud can achieve extreme performance by leveraging the highly scalable, available, secure Redis service without the burden of managing complex Redis deployments.
+
 Install the python package:

 ```bash
@@ -358,12 +547,13 @@ pip install langchain-google-memorystore-redis
 See [usage example](/docs/integrations/document_loaders/google_memorystore_redis).

 ```python
-from langchain_google_memorystore_redis import MemorystoreLoader
+from langchain_google_memorystore_redis import MemorystoreDocumentLoader
 ```

 ### Spanner

 > [Google Cloud Spanner](https://cloud.google.com/spanner/docs) is a fully managed, mission-critical, relational database service on Google Cloud that offers transactional consistency at global scale, automatic, synchronous replication for high availability, and support for two SQL dialects: GoogleSQL (ANSI 2011 with extensions) and PostgreSQL.
+
 Install the python package:

 ```bash
@@ -482,6 +672,7 @@ from langchain.vectorstores import BigQueryVectorSearch
 ### Memorystore for Redis

 > [Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) is a fully managed Redis service for Google Cloud. Applications running on Google Cloud can achieve extreme performance by leveraging the highly scalable, available, secure Redis service without the burden of managing complex Redis deployments.
+
 Install the python package:

 ```bash
@@ -497,6 +688,7 @@ from langchain_google_memorystore_redis import RedisVectorStore
 ### Spanner

 > [Google Cloud Spanner](https://cloud.google.com/spanner/docs) is a fully managed, mission-critical, relational database service on Google Cloud that offers transactional consistency at global scale, automatic, synchronous replication for high availability, and support for two SQL dialects: GoogleSQL (ANSI 2011 with extensions) and PostgreSQL.
+
 Install the python package:

 ```bash
@@ -512,6 +704,7 @@ from langchain_google_spanner import SpannerVectorStore
 ### Firestore (Native Mode)

 > [Google Cloud Firestore](https://cloud.google.com/firestore/docs/) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
+
 Install the python package:

 ```bash
@@ -521,12 +714,13 @@ pip install langchain-google-firestore
 See [usage example](/docs/integrations/vectorstores/google_firestore).

 ```python
-from langchain_google_firestore import FirestoreVectorstore
+from langchain_google_firestore import FirestoreVectorStore
 ```

 ### Cloud SQL for MySQL

 > [Google Cloud SQL for MySQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your MySQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -542,6 +736,7 @@ from langchain_google_cloud_sql_mysql import MySQLEngine, MySQLVectorStore
 ### Cloud SQL for PostgreSQL

 > [Google Cloud SQL for PostgreSQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your PostgreSQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -573,6 +768,52 @@ See a [usage example](/docs/integrations/vectorstores/google_vertex_ai_vector_se
 from langchain_google_vertexai import VectorSearchVectorStore
 ```

+### Vertex AI Vector Search with DataStore
+
+> VectorSearch with DatasTore document storage.
+
+Install the python package:
+
+```bash
+pip install langchain-google-vertexai
+```
+
+See a [usage example](/docs/integrations/vectorstores/google_vertex_ai_vector_search/#optional--you-can-also-create-vectore-and-store-chunks-in-a-datastore).
+
+```python
+from langchain_google_vertexai import VectorSearchVectorStoreDatastore
+```
+
+### VectorSearchVectorStoreGCS 
+
+> Alias of `VectorSearchVectorStore` for consistency 
+> with the rest of vector stores with different document storage backends.
+
+Install the python package:
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai import VectorSearchVectorStoreGCS
+```
+
+### Google Generative AI Vector Store 
+
+> Currently, it computes the embedding vectors on the server side.
+> For more information visit [Guide](https://developers.generativeai.google/guide).
+
+Install the python package:
+
+```bash
+pip install langchain-google-genai
+```
+
+```python
+from langchain_google_genai.google_vector_store import GoogleVectorStore
+```
+
 ### ScaNN

 >[Google ScaNN](https://github.com/google-research/google-research/tree/master/scann)
@@ -605,7 +846,7 @@ from langchain_community.vectorstores import ScaNN
 We need to install several python packages.

 ```bash
-pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
+pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib langchain-googledrive
 ```

 See a [usage example and authorization instructions](/docs/integrations/retrievers/google_drive).
@@ -619,16 +860,38 @@ from langchain_googledrive.retrievers import GoogleDriveRetriever
 > [Vertex AI Search](https://cloud.google.com/generative-ai-app-builder/docs/introduction)
 > from Google Cloud allows developers to quickly build generative AI powered search engines for customers and employees.

+See a [usage example](/docs/integrations/retrievers/google_vertex_ai_search).
+
+Note: `GoogleVertexAISearchRetriever` is deprecated, use `VertexAIMultiTurnSearchRetriever`,
+`VertexAISearchSummaryTool`, and `VertexAISearchRetriever` (see below).
+
+#### GoogleVertexAISearchRetriever
+
 We need to install the `google-cloud-discoveryengine` python package.

 ```bash
 pip install google-cloud-discoveryengine
 ```

-See a [usage example](/docs/integrations/retrievers/google_vertex_ai_search).
+```python
+from langchain_community.retrievers import GoogleVertexAISearchRetriever
+```
+
+#### VertexAIMultiTurnSearchRetriever

 ```python
-from langchain.retrievers import GoogleVertexAISearchRetriever
+from langchain_google_community import VertexAIMultiTurnSearchRetriever
+```
+#### VertexAISearchRetriever
+
+```python
+from langchain_google_community import VertexAIMultiTurnSearchRetriever
+```
+
+#### VertexAISearchSummaryTool
+
+```python
+from langchain_google_community import VertexAISearchSummaryTool
 ```

 ### Document AI Warehouse
@@ -662,10 +925,10 @@ from langchain_google_community.documentai_warehouse import DocumentAIWarehouseR
 > It applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks
 > to deliver the highest fidelity possible.

-We need to install a python package.
+We need to install python packages.

 ```bash
-pip install google-cloud-text-to-speech
+pip install google-cloud-text-to-speech langchain-google-community
 ```

 See a [usage example and authorization instructions](/docs/integrations/tools/google_cloud_texttospeech).
@@ -680,13 +943,14 @@ We need to install several python packages.

 ```bash
 pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
+pip install langchain-googledrive
 ```

 See a [usage example and authorization instructions](/docs/integrations/tools/google_drive).

 ```python
-from langchain_community.utilities.google_drive import GoogleDriveAPIWrapper
-from langchain_community.tools.google_drive.tool import GoogleDriveSearchTool
+from langchain_googledrive.utilities.google_drive import GoogleDriveAPIWrapper
+from langchain_googledrive.tools.google_drive.tool import GoogleDriveSearchTool
 ```

 ### Google Finance
@@ -776,6 +1040,23 @@ from langchain.agents import load_tools
 tools = load_tools(["google-search"])
 ```

+#### GoogleSearchResults
+
+Tool that queries the `Google Search` API (via `GoogleSearchAPIWrapper`) and gets back JSON.
+
+```python
+from langchain_community.tools import GoogleSearchResults
+```
+
+#### GoogleSearchRun
+
+Tool that queries the `Google Search` API (via `GoogleSearchAPIWrapper`).
+
+```python
+from langchain_community.tools import GoogleSearchRun
+```
+
+
 ### Google Trends

 We need to install a python package.
@@ -810,6 +1091,18 @@ See a [usage example and authorization instructions](/docs/integrations/tools/gm
 from langchain_google_community import GmailToolkit
 ```

+#### GMail individual tools
+
+You can use individual tools from GMail Toolkit.
+
+```python
+from langchain_google_community.gmail.create_draft import GmailCreateDraft
+from langchain_google_community.gmail.get_message import GmailGetMessage
+from langchain_google_community.gmail.get_thread import GmailGetThread
+from langchain_google_community.gmail.search import GmailSearch
+from langchain_google_community.gmail.send_message import GmailSendMessage
+```
+
 ## Memory

 ### AlloyDB for PostgreSQL
@@ -831,6 +1124,7 @@ from langchain_google_alloydb_pg import AlloyDBEngine, AlloyDBChatMessageHistory
 ### Cloud SQL for PostgreSQL

 > [Cloud SQL for PostgreSQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your PostgreSQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -847,6 +1141,7 @@ from langchain_google_cloud_sql_pg import PostgresEngine, PostgresChatMessageHis
 ### Cloud SQL for MySQL

 > [Cloud SQL for MySQL](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your MySQL relational databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -862,6 +1157,7 @@ from langchain_google_cloud_sql_mysql import MySQLEngine, MySQLChatMessageHistor
 ### Cloud SQL for SQL Server

 > [Cloud SQL for SQL Server](https://cloud.google.com/sql) is a fully-managed database service that helps you set up, maintain, manage, and administer your SQL Server databases on Google Cloud.
+
 Install the python package:

 ```bash
@@ -877,6 +1173,7 @@ from langchain_google_cloud_sql_mssql import MSSQLEngine, MSSQLChatMessageHistor
 ### Spanner

 > [Google Cloud Spanner](https://cloud.google.com/spanner/docs) is a fully managed, mission-critical, relational database service on Google Cloud that offers transactional consistency at global scale, automatic, synchronous replication for high availability, and support for two SQL dialects: GoogleSQL (ANSI 2011 with extensions) and PostgreSQL.
+
 Install the python package:

 ```bash
@@ -892,6 +1189,7 @@ from langchain_google_spanner import SpannerChatMessageHistory
 ### Memorystore for Redis

 > [Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) is a fully managed Redis service for Google Cloud. Applications running on Google Cloud can achieve extreme performance by leveraging the highly scalable, available, secure Redis service without the burden of managing complex Redis deployments.
+
 Install the python package:

 ```bash
@@ -907,6 +1205,7 @@ from langchain_google_memorystore_redis import MemorystoreChatMessageHistory
 ### Bigtable

 > [Google Cloud Bigtable](https://cloud.google.com/bigtable/docs) is Google's fully managed NoSQL Big Data database service in Google Cloud.
+
 Install the python package:

 ```bash
@@ -922,6 +1221,7 @@ from langchain_google_bigtable import BigtableChatMessageHistory
 ### Firestore (Native Mode)

 > [Google Cloud Firestore](https://cloud.google.com/firestore/docs/) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
+
 Install the python package:

 ```bash
@@ -938,6 +1238,7 @@ from langchain_google_firestore import FirestoreChatMessageHistory

 > [Google Cloud Firestore in Datastore mode](https://cloud.google.com/datastore/docs) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
 > Firestore is the newest version of Datastore and introduces several improvements over Datastore.
+
 Install the python package:

 ```bash
@@ -966,6 +1267,22 @@ See [usage example](/docs/integrations/memory/google_el_carro).
 from langchain_google_el_carro import ElCarroChatMessageHistory
 ```

+## Callbacks
+
+### Vertex AI callback handler
+
+>Callback Handler that tracks `VertexAI` info.
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+```python
+from langchain_google_vertexai.callbacks import VertexAICallbackHandler
+```
+
 ## Chat Loaders

 ### GMail
@@ -985,6 +1302,30 @@ See a [usage example and authorization instructions](/docs/integrations/chat_loa
 from langchain_google_community import GMailLoader
 ```

+## Evaluators
+
+We need to install `langchain-google-vertexai` python package.
+
+```bash
+pip install langchain-google-vertexai
+```
+
+### VertexPairWiseStringEvaluator
+
+>Pair-wise evaluation of the perplexity of a predicted string.
+
+```python
+from langchain_google_vertexai.evaluators.evaluation import VertexPairWiseStringEvaluator
+```
+
+### VertexStringEvaluator
+
+>Evaluate the perplexity of a predicted string.
+
+```python
+from langchain_google_vertexai.evaluators.evaluation import VertexPairWiseStringEvaluator
+```
+
 ## 3rd Party Integrations

 ### SearchApi
--- a/docs/docs/integrations/providers/arxiv.mdx
+++ b/docs/docs/integrations/providers/arxiv.mdx
@@ -32,5 +32,5 @@ from langchain_community.document_loaders import ArxivLoader
 See a [usage example](/docs/integrations/retrievers/arxiv).

 ```python
-from langchain.retrievers import ArxivRetriever
+from langchain_community.retrievers import ArxivRetriever
 ```
--- a/docs/docs/integrations/providers/baidu.mdx
+++ b/docs/docs/integrations/providers/baidu.mdx
@@ -24,6 +24,7 @@ from langchain_community.llms import QianfanLLMEndpoint
 ### Qianfan Chat Endpoint

 See a [usage example](/docs/integrations/chat/baidu_qianfan_endpoint).
+See another [usage example](/docs/integrations/chat/ernie).

 ```python
 from langchain_community.chat_models import QianfanChatEndpoint
@@ -34,11 +35,26 @@ from langchain_community.chat_models import QianfanChatEndpoint
 ### Baidu Qianfan

 See a [usage example](/docs/integrations/text_embedding/baidu_qianfan_endpoint).
+See another [usage example](/docs/integrations/text_embedding/ernie).

 ```python
 from langchain_community.embeddings import QianfanEmbeddingsEndpoint
 ```

+## Document loaders
+
+### Baidu BOS Directory Loader
+
+```python
+from langchain_community.document_loaders.baiducloud_bos_directory import BaiduBOSDirectoryLoader
+```
+
+### Baidu BOS File Loader
+
+```python
+from langchain_community.document_loaders.baiducloud_bos_file import BaiduBOSFileLoader
+```
+
 ## Vector stores

 ### Baidu Cloud ElasticSearch VectorSearch
--- a/docs/docs/integrations/providers/bookendai.mdx
+++ b/docs/docs/integrations/providers/bookendai.mdx
@@ -0,0 +1,18 @@
+# bookend.ai
+
+LangChain implements an integration with embeddings provided by [bookend.ai](https://bookend.ai/).
+
+
+## Installation and Setup
+
+
+You need to register and get the `API_KEY` 
+from the [bookend.ai](https://bookend.ai/) website.
+
+## Embedding model
+
+See a [usage example](/docs/integrations/text_embedding/bookend).
+
+```python
+from langchain_community.embeddings import BookendEmbeddings
+```
--- a/docs/docs/integrations/providers/box.mdx
+++ b/docs/docs/integrations/providers/box.mdx
@@ -9,8 +9,8 @@ In this package, we make available a number of ways to include Box content in yo

 ### Installation and setup

-```text
-%pip install -U langchain-box
+```bash
+pip install -U langchain-box

 ```

@@ -31,12 +31,6 @@ In order to integrate with Box, you need a few things:
 The good news is if you are using a free developer account, you are the admin.
 [Authorize your app](https://developer.box.com/guides/authorization/custom-app-approval/#manual-approval)

-## Installation
-
-```bash
-pip install -U langchain-box
-```
-
 ## Authentication

 The `box-langchain` package offers some flexibility to authentication. The
--- a/docs/docs/integrations/providers/cassandra.mdx
+++ b/docs/docs/integrations/providers/cassandra.mdx
@@ -83,3 +83,28 @@ from langchain_community.agent_toolkits.cassandra_database.toolkit import (
 Learn more in the [example notebook](/docs/integrations/tools/cassandra_database).


+Cassandra Database individual tools:
+
+### Get Schema
+
+Tool for getting the schema of a keyspace in an Apache Cassandra database.
+
+```python
+from langchain_community.tools import GetSchemaCassandraDatabaseTool
+```
+
+### Get Table Data
+
+Tool for getting data from a table in an Apache Cassandra database.
+
+```python
+from langchain_community.tools import GetTableDataCassandraDatabaseTool
+```
+
+### Query
+
+Tool for querying an Apache Cassandra database with provided CQL.
+
+```python
+from langchain_community.tools import QueryCassandraDatabaseTool
+```
--- a/docs/docs/integrations/providers/cerebras.mdx
+++ b/docs/docs/integrations/providers/cerebras.mdx
@@ -0,0 +1,30 @@
+# Cerebras
+
+At Cerebras, we've developed the world's largest and fastest AI processor, the Wafer-Scale Engine-3 (WSE-3). The Cerebras CS-3 system, powered by the WSE-3, represents a new class of AI supercomputer that sets the standard for generative AI training and inference with unparalleled performance and scalability.
+
+With Cerebras as your inference provider, you can:
+- Achieve unprecedented speed for AI inference workloads
+- Build commercially with high throughput
+- Effortlessly scale your AI workloads with our seamless clustering technology
+
+Our CS-3 systems can be quickly and easily clustered to create the largest AI supercomputers in the world, making it simple to place and run the largest models. Leading corporations, research institutions, and governments are already using Cerebras solutions to develop proprietary models and train popular open-source models.
+
+Want to experience the power of Cerebras? Check out our [website](https://cerebras.ai) for more resources and explore options for accessing our technology through the Cerebras Cloud or on-premise deployments!
+
+For more information about Cerebras Cloud, visit [cloud.cerebras.ai](https://cloud.cerebras.ai/). Our API reference is available at [inference-docs.cerebras.ai](https://inference-docs.cerebras.ai/).
+
+## Installation and Setup
+Install the integration package:
+
+```bash
+pip install langchain-cerebras
+```
+
+## API Key
+Get an API Key from [cloud.cerebras.ai](https://cloud.cerebras.ai/) and add it to your environment variables:
+```
+export CEREBRAS_API_KEY="your-api-key-here"
+```
+
+## Chat Model
+See a [usage example](/docs/integrations/chat/cerebras).
--- a/docs/docs/integrations/providers/coze.mdx
+++ b/docs/docs/integrations/providers/coze.mdx
@@ -0,0 +1,19 @@
+# Coze
+
+[Coze](https://www.coze.com/) is an AI chatbot development platform that enables
+the creation and deployment of chatbots for handling diverse conversations across
+various applications.
+
+
+## Installation and Setup
+
+First, you need to get the `API_KEY` from the [Coze](https://www.coze.com/) website.
+
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/coze/).
+
+```python
+from langchain_community.chat_models import ChatCoze
+```
--- a/docs/docs/integrations/providers/dappierai.mdx
+++ b/docs/docs/integrations/providers/dappierai.mdx
@@ -0,0 +1,18 @@
+# Dappier AI
+
+> [Dappier](https://platform.dappier.com/) is a platform enabling access to diverse, 
+> real-time data models. Enhance your AI applications with `Dappier’s` pre-trained, 
+> LLM-ready data models and ensure accurate, current responses with reduced inaccuracies.
+
+## Installation and Setup
+
+To use one of the `Dappier AI` Data Models, you will need an API key. Visit 
+[Dappier Platform](https://platform.dappier.com/) to log in and create an API key in your profile.
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/dappier).
+
+```python
+from langchain_community.chat_models import ChatDappierAI
+```
--- a/docs/docs/integrations/providers/dspy.ipynb
+++ b/docs/docs/integrations/providers/dspy.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# DSPy\n",
    "\n",
-    "[DSPy](https://github.com/stanfordnlp/dspy) is a fantastic framework for LLMs that introduces an automatic compiler that teaches LMs how to conduct the declarative steps in your program. Specifically, the DSPy compiler will internally trace your program and then craft high-quality prompts for large LMs (or train automatic finetunes for small LMs) to teach them the steps of your task.\n",
+    ">[DSPy](https://github.com/stanfordnlp/dspy) is a fantastic framework for LLMs that introduces an automatic compiler that teaches LMs how to conduct the declarative steps in your program. Specifically, the DSPy compiler will internally trace your program and then craft high-quality prompts for large LMs (or train automatic finetunes for small LMs) to teach them the steps of your task.\n",
    "\n",
    "Thanks to [Omar Khattab](https://twitter.com/lateinteraction) we have an integration! It works with any LCEL chains with some minor modifications.\n",
    "\n",
@@ -17,6 +17,9 @@
    "\n",
    "Let's take a look at an example. In this example we will make a simple RAG pipeline. We will use DSPy to \"compile\" our program and learn an optimized prompt.\n",
    "\n",
+    "This example uses the `ColBERTv2` model.\n",
+    "See the [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](https://arxiv.org/abs/2112.01488) paper.\n",
+    "\n",
    "\n",
    "## Install dependencies\n",
    "\n",
@@ -1175,7 +1178,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/providers/embedchain.mdx
+++ b/docs/docs/integrations/providers/embedchain.mdx
@@ -0,0 +1,25 @@
+# Embedchain
+
+> [Embedchain](https://github.com/embedchain/embedchain) is a RAG framework to create 
+> data pipelines. It loads, indexes, retrieves and syncs all the data.
+>
+>It is available as an [open source package](https://github.com/embedchain/embedchain) 
+> and as a [hosted platform solution](https://app.embedchain.ai/).
+ 
+
+## Installation and Setup
+
+Install the package using pip:
+
+```bash
+pip install embedchain
+```
+
+
+## Retriever
+
+See a [usage example](/docs/integrations/retrievers/embedchain).
+
+```python
+from langchain_community.retrievers import EmbedchainRetriever
+```
--- a/docs/docs/integrations/providers/everlyai.mdx
+++ b/docs/docs/integrations/providers/everlyai.mdx
@@ -0,0 +1,17 @@
+# Everly AI
+
+> [Everly AI](https://everlyai.xyz/) allows you to run your ML models at scale in the cloud. 
+> It also provides API access to [several LLM models](https://everlyai.xyz/).
+
+## Installation and Setup
+
+To use `Everly AI`, you will need an API key. Visit 
+[Everly AI](https://everlyai.xyz/) to create an API key in your profile.
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/everlyai).
+
+```python
+from langchain_community.chat_models import ChatEverlyAI
+```
--- a/docs/docs/integrations/providers/exa_search.ipynb
+++ b/docs/docs/integrations/providers/exa_search.ipynb
@@ -4,9 +4,14 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Exa Search\n",
+    "# Exa\n",
    "\n",
-    "Exa's search integration exists in its own [partner package](https://pypi.org/project/langchain-exa/). You can install it with:"
+    ">[Exa](https://exa.ai/) is a knowledge API for AI and developers.\n",
+    ">\n",
+    "\n",
+    "## Installation and Setup\n",
+    "\n",
+    "`Exa` integration exists in its own [partner package](https://pypi.org/project/langchain-exa/). You can install it with:"
   ]
  },
  {
@@ -26,7 +31,9 @@
    "\n",
    "## Retriever\n",
    "\n",
-    "You can use the [`ExaSearchRetriever`](/docs/integrations/tools/exa_search#using-exasearchretriever) in a standard retrieval pipeline. You can import it as follows"
+    "You can use the [`ExaSearchRetriever`](/docs/integrations/tools/exa_search#using-exasearchretriever) in a standard retrieval pipeline. You can import it as follows.\n",
+    "\n",
+    "See a [usage example](/docs/integrations/tools/exa_search).\n"
   ]
  },
  {
@@ -46,7 +53,40 @@
   "source": [
    "## Tools\n",
    "\n",
-    "You can use Exa as an agent tool as described in the [Exa tool calling docs](/docs/integrations/tools/exa_search#using-the-exa-sdk-as-langchain-agent-tools).\n"
+    "You can use Exa as an agent tool as described in the [Exa tool calling docs](/docs/integrations/tools/exa_search#using-the-exa-sdk-as-langchain-agent-tools).\n",
+    "\n",
+    "See a [usage example](/docs/integrations/tools/exa_search).\n",
+    "\n",
+    "### ExaFindSimilarResults\n",
+    "\n",
+    "A tool that queries the Metaphor Search API and gets back JSON."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_exa.tools import ExaFindSimilarResults"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ExaSearchResults\n",
+    "\n",
+    "Exa Search tool."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_exa.tools import ExaSearchResults"
   ]
  }
 ],
@@ -69,9 +109,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/providers/fireworks.md
+++ b/docs/docs/integrations/providers/fireworks.md
@@ -1,7 +1,9 @@
-# Fireworks
+# Fireworks AI
+
+>[Fireworks AI](https://fireworks.ai) is a generative AI inference platform to run and 
+> customize models with industry-leading speed and production-readiness.
+

-This page covers how to use [Fireworks](https://fireworks.ai/) models within
-Langchain.

 ## Installation and setup

@@ -14,7 +16,7 @@ Langchain.
 - Get a Fireworks API key by signing up at [fireworks.ai](https://fireworks.ai).
 - Authenticate by setting the FIREWORKS_API_KEY environment variable.

-## Authentication
+### Authentication

 There are two ways to authenticate using your Fireworks API key:

@@ -29,20 +31,26 @@ There are two ways to authenticate using your Fireworks API key:
    ```python
    llm = Fireworks(api_key="<KEY>")
    ```
+## Chat models

-## Using the Fireworks LLM module
+See a [usage example](/docs/integrations/chat/fireworks).

-Fireworks integrates with Langchain through the LLM module. In this example, we
-will work the mixtral-8x7b-instruct model. 
+```python
+from langchain_fireworks import ChatFireworks
+```
+
+## LLMs
+
+See a [usage example](/docs/integrations/llms/fireworks).

 ```python
 from langchain_fireworks import Fireworks 
-
-llm = Fireworks(
-    api_key="<KEY>",
-    model="accounts/fireworks/models/mixtral-8x7b-instruct",
-    max_tokens=256)
-llm("Name 3 sports.")
 ```

-For a more detailed walkthrough, see [here](/docs/integrations/llms/Fireworks).
+## Embedding models
+
+See a [usage example](/docs/integrations/text_embedding/fireworks).
+
+```python
+from langchain_fireworks import FireworksEmbeddings 
+```
--- a/docs/docs/integrations/providers/forefrontai.mdx
+++ b/docs/docs/integrations/providers/forefrontai.mdx
@@ -1,16 +1,19 @@
-# ForefrontAI
+# Forefront AI
+
+> [Forefront AI](https://forefront.ai/) is a platform enabling you to
+> fine-tune and inference open-source text generation models 

-This page covers how to use the ForefrontAI ecosystem within LangChain.
-It is broken into two parts: installation and setup, and then references to specific ForefrontAI wrappers.

 ## Installation and Setup
- Get an ForefrontAI api key and set it as an environment variable (`FOREFRONTAI_API_KEY`)

-## Wrappers
+Get an `ForefrontAI` API key
+visiting [this page](https://accounts.forefront.ai/sign-in?redirect_url=https%3A%2F%2Fforefront.ai%2Fapp%2Fapi-keys).
+ and set it as an environment variable (`FOREFRONTAI_API_KEY`).

-### LLM
+## LLM
+
+See a [usage example](/docs/integrations/llms/forefrontai).

-There exists an ForefrontAI LLM wrapper, which you can access with 
 ```python
 from langchain_community.llms import ForefrontAI
 ```
--- a/docs/docs/integrations/providers/friendli.md
+++ b/docs/docs/integrations/providers/friendli.md
@@ -0,0 +1,31 @@
+# Friendli AI
+
+>[FriendliAI](https://friendli.ai/) enhances AI application performance and optimizes 
+> cost savings with scalable, efficient deployment options, tailored for high-demand AI workloads.
+
+## Installation and setup
+
+Install the `friendli-client` python package.
+
+```bash
+pip install friendli-client
+```
+Sign in to [Friendli Suite](https://suite.friendli.ai/) to create a Personal Access Token, 
+and set it as the `FRIENDLI_TOKEN` environment variable.
+
+
+## Chat models
+
+See a [usage example](/docs/integrations/chat/friendli).
+
+```python
+from langchain_community.chat_models.friendli import ChatFriendli
+```
+
+## LLMs
+
+See a [usage example](/docs/integrations/llms/friendli).
+
+```python
+from langchain_community.llms.friendli import Friendli
+```
--- a/docs/docs/integrations/providers/github.mdx
+++ b/docs/docs/integrations/providers/github.mdx
@@ -20,3 +20,26 @@ See a [usage example](/docs/integrations/document_loaders/github).
 ```python
 from langchain_community.document_loaders import GitHubIssuesLoader, GithubFileLoader
 ```
+
+## Tools/Toolkit
+
+### GitHubToolkit
+The `GitHub` toolkit contains tools that enable an LLM agent to interact 
+with a GitHub repository. 
+
+The toolkit is a wrapper for the `PyGitHub` library.
+
+```python
+from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit
+```
+
+Learn more in the [example notebook](/docs/integrations/tools/github).
+
+### GitHubAction
+
+Tool for interacting with the GitHub API.
+
+```python
+from langchain_community.tools.github.tool import GitHubAction
+```
+
--- a/docs/docs/integrations/providers/gitlab.mdx
+++ b/docs/docs/integrations/providers/gitlab.mdx
@@ -0,0 +1,31 @@
+# GitLab
+
+>[GitLab Inc.](https://about.gitlab.com/) is an open-core company 
+> that operates `GitLab`, a DevOps software package that can develop, 
+> secure, and operate software. `GitLab` includes a distributed version 
+> control based on Git, including features such as access control, bug tracking,
+> software feature requests, task management, and wikis for every project, 
+> as well as snippets. 
+
+
+## Tools/Toolkits
+
+### GitLabToolkit
+
+The `Gitlab` toolkit contains tools that enable an LLM agent to interact with a gitlab repository. 
+
+The toolkit is a wrapper for the `python-gitlab` library.
+
+See a [usage example](/docs/integrations/tools/gitlab).
+
+```python
+from langchain_community.agent_toolkits.gitlab.toolkit import GitLabToolkit
+```
+
+### GitLabAction
+
+Tool for interacting with the GitLab API.
+
+```python
+from langchain_community.tools.github.tool import GitHubAction
+```
--- a/docs/docs/integrations/providers/gooseai.mdx
+++ b/docs/docs/integrations/providers/gooseai.mdx
@@ -1,9 +1,13 @@
 # GooseAI

-This page covers how to use the GooseAI ecosystem within LangChain.
-It is broken into two parts: installation and setup, and then references to specific GooseAI wrappers.
+>[GooseAI](https://goose.ai) makes deploying NLP services easier and more accessible. 
+> `GooseAI` is a fully managed inference service delivered via API. 
+> With feature parity to other well known APIs, `GooseAI` delivers a plug-and-play solution 
+> for serving open source language models at the industry's best economics by simply 
+> changing 2 lines in your code.

 ## Installation and Setup
+
 - Install the Python SDK with `pip install openai`
 - Get your GooseAI api key from this link [here](https://goose.ai/).
 - Set the environment variable (`GOOSEAI_API_KEY`).
@@ -13,11 +17,11 @@ import os
 os.environ["GOOSEAI_API_KEY"] = "YOUR_API_KEY"
 ```

-## Wrappers

-### LLM
+## LLMs
+
+See a [usage example](/docs/integrations/llms/gooseai).

-There exists an GooseAI LLM wrapper, which you can access with: 
 ```python
 from langchain_community.llms import GooseAI
 ```
--- a/docs/docs/integrations/providers/groq.mdx
+++ b/docs/docs/integrations/providers/groq.mdx
@@ -1,17 +1,20 @@
 # Groq

-Welcome to Groq! 🚀 At Groq, we've developed the world's first Language Processing Unit™, or LPU. The Groq LPU has a deterministic, single core streaming architecture that sets the standard for GenAI inference speed with predictable and repeatable performance for any given workload.
-
-Beyond the architecture, our software is designed to empower developers like you with the tools you need to create innovative, powerful AI applications. With Groq as your engine, you can:
-
-* Achieve uncompromised low latency and performance for real-time AI and HPC inferences 🔥
-* Know the exact performance and compute time for any given workload 🔮
-* Take advantage of our cutting-edge technology to stay ahead of the competition 💪
-
-Want more Groq? Check out our [website](https://groq.com) for more resources and join our [Discord community](https://discord.gg/JvNsBDKeCG) to connect with our developers!
+>[Groq](https://groq.com)developed the world's first Language Processing Unit™, or `LPU`. 
+> The `Groq LPU` has a deterministic, single core streaming architecture that sets the standard 
+> for GenAI inference speed with predictable and repeatable performance for any given workload.
+>
+>Beyond the architecture, `Groq` software is designed to empower developers like you with 
+> the tools you need to create innovative, powerful AI applications. 
+> 
+>With Groq as your engine, you can:
+>* Achieve uncompromised low latency and performance for real-time AI and HPC inferences 🔥
+>* Know the exact performance and compute time for any given workload 🔮
+>* Take advantage of our cutting-edge technology to stay ahead of the competition 💪


 ## Installation and Setup
+
 Install the integration package:

 ```bash
@@ -24,5 +27,10 @@ Request an [API key](https://wow.groq.com) and set it as an environment variable
 export GROQ_API_KEY=gsk_...
 ```

-## Chat Model
+## Chat models
+
 See a [usage example](/docs/integrations/chat/groq).
+
+```python
+from langchain_groq import ChatGroq
+```
--- a/docs/docs/integrations/providers/littlellm.md
+++ b/docs/docs/integrations/providers/littlellm.md
@@ -0,0 +1,37 @@
+# LiteLLM
+
+>[LiteLLM](https://docs.litellm.ai/docs/) is a library that simplifies calling Anthropic, 
+> Azure, Huggingface, Replicate, etc. LLMs in a unified way.
+> 
+>You can use `LiteLLM` through either:
+>
+>* [LiteLLM Proxy Server](https://docs.litellm.ai/docs/#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
+>* [LiteLLM python SDK](https://docs.litellm.ai/docs/#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
+
+## Installation and setup
+
+Install the `litellm` python package.
+
+```bash
+pip install litellm
+```
+
+## Chat models
+
+### ChatLiteLLM
+
+See a [usage example](/docs/integrations/chat/litellm).
+
+```python
+from langchain_community.chat_models import ChatLiteLLM
+```
+
+### ChatLiteLLMRouter
+
+You also can use the `ChatLiteLLMRouter` to route requests to different LLMs or LLM providers.
+
+See a [usage example](/docs/integrations/chat/litellm_router).
+
+```python
+from langchain_community.chat_models import ChatLiteLLMRouter
+```
--- a/docs/docs/integrations/providers/predibase.md
+++ b/docs/docs/integrations/providers/predibase.md
@@ -21,9 +21,24 @@ model = Predibase(
    model="mistral-7b",
    predibase_api_key=os.environ.get("PREDIBASE_API_TOKEN"),
    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)
+    """
+    Optionally use `model_kwargs` to set new default "generate()" settings.  For example:
+    {
+        "api_token": os.environ.get("HUGGING_FACE_HUB_TOKEN"),
+        "max_new_tokens": 5,  # default is 256
+    }
+    """
+    **model_kwargs,
 )

-response = model.invoke("Can you recommend me a nice dry wine?")
+"""
+Optionally use `kwargs` to dynamically overwrite "generate()" settings.  For example:
+{
+    "temperature": 0.5,  # default is the value in model_kwargs or 0.1 (initialization default)
+    "max_new_tokens": 1024,  # default is the value in model_kwargs or 256 (initialization default)
+}
+"""
+response = model.invoke("Can you recommend me a nice dry wine?", **kwargs)
 print(response)
 ```

@@ -42,9 +57,24 @@ model = Predibase(
    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)
    adapter_id="e2e_nlg",
    adapter_version=1,
+    """
+    Optionally use `model_kwargs` to set new default "generate()" settings.  For example:
+    {
+        "api_token": os.environ.get("HUGGING_FACE_HUB_TOKEN"),
+        "max_new_tokens": 5,  # default is 256
+    }
+    """
+    **model_kwargs,
 )

-response = model.invoke("Can you recommend me a nice dry wine?")
+"""
+Optionally use `kwargs` to dynamically overwrite "generate()" settings.  For example:
+{
+    "temperature": 0.5,  # default is the value in model_kwargs or 0.1 (initialization default)
+    "max_new_tokens": 1024,  # default is the value in model_kwargs or 256 (initialization default)
+}
+"""
+response = model.invoke("Can you recommend me a nice dry wine?", **kwargs)
 print(response)
 ```

@@ -62,8 +92,23 @@ model = Predibase(
    predibase_api_key=os.environ.get("PREDIBASE_API_TOKEN"),
    predibase_sdk_version=None,  # optional parameter (defaults to the latest Predibase SDK version if omitted)
    adapter_id="predibase/e2e_nlg",
+    """
+    Optionally use `model_kwargs` to set new default "generate()" settings.  For example:
+    {
+        "api_token": os.environ.get("HUGGING_FACE_HUB_TOKEN"),
+        "max_new_tokens": 5,  # default is 256
+    }
+    """
+    **model_kwargs,
 )

-response = model.invoke("Can you recommend me a nice dry wine?")
+"""
+Optionally use `kwargs` to dynamically overwrite "generate()" settings.  For example:
+{
+    "temperature": 0.5,  # default is the value in model_kwargs or 0.1 (initialization default)
+    "max_new_tokens": 1024,  # default is the value in model_kwargs or 256 (initialization default)
+}
+"""
+response = model.invoke("Can you recommend me a nice dry wine?", **kwargs)
 print(response)
 ```
--- a/docs/docs/integrations/providers/ragatouille.ipynb
+++ b/docs/docs/integrations/providers/ragatouille.ipynb
@@ -7,7 +7,9 @@
   "source": [
    "# RAGatouille\n",
    "\n",
-    "[RAGatouille](https://github.com/bclavie/RAGatouille) makes it as simple as can be to use ColBERT! [ColBERT](https://github.com/stanford-futuredata/ColBERT) is a fast and accurate retrieval model, enabling scalable BERT-based search over large text collections in tens of milliseconds.\n",
+    ">[RAGatouille](https://github.com/bclavie/RAGatouille) makes it as simple as can be to use `ColBERT`! [ColBERT](https://github.com/stanford-futuredata/ColBERT) is a fast and accurate retrieval model, enabling scalable BERT-based search over large text collections in tens of milliseconds.\n",
+    ">\n",
+    ">See the [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](https://arxiv.org/abs/2112.01488) paper.\n",
    "\n",
    "There are multiple ways that we can use RAGatouille.\n",
    "\n",
@@ -258,7 +260,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/providers/rank_bm25.mdx
+++ b/docs/docs/integrations/providers/rank_bm25.mdx
@@ -0,0 +1,25 @@
+# rank_bm25
+
+[rank_bm25](https://github.com/dorianbrown/rank_bm25) is an open-source collection of algorithms
+designed to query documents and return the most relevant ones, commonly used for creating
+search engines.
+
+See its [project page](https://github.com/dorianbrown/rank_bm25) for available algorithms.
+
+
+## Installation and Setup
+
+First, you need to install `rank_bm25` python package.
+
+```bash
+pip install rank_bm25
+```
+
+
+## Retriever
+
+See a [usage example](/docs/integrations/retrievers/bm25).
+
+```python
+from langchain_community.retrievers import BM25Retriever
+```
--- a/docs/docs/integrations/providers/yahoo.mdx
+++ b/docs/docs/integrations/providers/yahoo.mdx
@@ -0,0 +1,24 @@
+# Yahoo
+
+>[Yahoo (Wikipedia)](https://en.wikipedia.org/wiki/Yahoo) is an American web services provider.
+>
+> It provides a web portal, search engine Yahoo Search, and related 
+> services, including `My Yahoo`, `Yahoo Mail`, `Yahoo News`, 
+> `Yahoo Finance`, `Yahoo Sports` and its advertising platform, `Yahoo Native`.
+
+
+## Tools
+
+### Yahoo Finance News
+
+We have to install a python package:
+
+```bash
+pip install yfinance
+```
+See a [usage example](/docs/integrations/tools/yahoo_finance_news).
+
+
+```python
+from langchain_community.tools import YahooFinanceNewsTool
+```
--- a/docs/docs/integrations/providers/yandex.mdx
+++ b/docs/docs/integrations/providers/yandex.mdx
@@ -31,3 +31,26 @@ See a [usage example](/docs/integrations/chat/yandex).
 ```python
 from langchain_community.chat_models import ChatYandexGPT
 ```
+
+## Embedding models
+
+### YandexGPT
+
+See a [usage example](/docs/integrations/text_embedding/yandex).
+
+```python
+from langchain_community.embeddings import YandexGPTEmbeddings
+```
+
+## Parser
+
+### YandexSTTParser
+
+It transcribes and parses audio files. 
+
+`YandexSTTParser` is similar to the `OpenAIWhisperParser`.
+See a [usage example with OpenAIWhisperParser](/docs/integrations/document_loaders/youtube_audio).
+
+```python
+from langchain_community.document_loaders import YandexSTTParser
+```
--- a/docs/docs/integrations/providers/yellowbrick.mdx
+++ b/docs/docs/integrations/providers/yellowbrick.mdx
@@ -0,0 +1,17 @@
+# Yellowbrick
+
+>[Yellowbrick](https://yellowbrick.com/) is a provider of 
+> Enterprise Data Warehousing, Ad-hoc and Streaming Analytics, 
+> BI and AI workloads. 
+
+## Vector store
+
+We have to install a python package:
+
+```bash
+pip install psycopg2
+```
+
+```python
+from langchain_community.vectorstores import Yellowbrick
+```
--- a/docs/docs/integrations/providers/you.mdx
+++ b/docs/docs/integrations/providers/you.mdx
@@ -0,0 +1,19 @@
+# You
+
+>[You](https://you.com/about) company provides an AI productivity platform.
+
+## Retriever
+
+See a [usage example](/docs/integrations/retrievers/you-retriever).
+
+```python
+from langchain_community.retrievers.you import YouRetriever
+```
+
+## Tools
+
+See a [usage example](/docs/integrations/tools/you).
+
+```python
+from langchain_community.tools.you import YouSearchTool
+```
--- a/docs/docs/integrations/retrievers/box.ipynb
+++ b/docs/docs/integrations/retrievers/box.ipynb
@@ -17,7 +17,7 @@
   "source": [
    "# BoxRetriever\n",
    "\n",
-    "This will help you getting started with the Box [retriever](/docs/concepts/#retrievers). For detailed documentation of all BoxRetriever features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.Box.BoxRetriever.html).\n",
+    "This will help you getting started with the Box [retriever](/docs/concepts/#retrievers). For detailed documentation of all BoxRetriever features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.box.BoxRetriever.html).\n",
    "\n",
    "# Overview\n",
    "\n",
@@ -35,7 +35,7 @@
    "\n",
    "| Retriever | Self-host | Cloud offering | Package |\n",
    "| :--- | :--- | :---: | :---: |\n",
-    "[BoxRetriever](https://python.langchain.com/v0.2/api_reference/langchain-box/retrievers/langchain-box.retrievers.langchain_box.BoxRetriever.html) | ❌ | ✅ | langchain-box |\n",
+    "[BoxRetriever](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.box.BoxRetriever.html) | ❌ | ✅ | langchain-box |\n",
    "\n",
    "## Setup\n",
    "\n",
@@ -290,7 +290,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For detailed documentation of all BoxRetriever features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.Box.BoxRetriever.html).\n",
+    "For detailed documentation of all BoxRetriever features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.box.BoxRetriever.html).\n",
    "\n",
    "\n",
    "## Help\n",
--- a/docs/docs/integrations/retrievers/ragatouille.ipynb
+++ b/docs/docs/integrations/retrievers/ragatouille.ipynb
@@ -11,6 +11,8 @@
    ">[RAGatouille](https://github.com/bclavie/RAGatouille) makes it as simple as can be to use `ColBERT`!\n",
    ">\n",
    ">[ColBERT](https://github.com/stanford-futuredata/ColBERT) is a fast and accurate retrieval model, enabling scalable BERT-based search over large text collections in tens of milliseconds.\n",
+    ">\n",
+    ">See the [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](https://arxiv.org/abs/2112.01488) paper.\n",
    "\n",
    "We can use this as a [retriever](/docs/how_to#retrievers). It will show functionality specific to this integration. After going through, it may be useful to explore [relevant use-case pages](/docs/how_to#qa-with-rag) to learn how to use this vector store as part of a larger chain.\n",
    "\n",
--- a/docs/docs/integrations/retrievers/self_query/neo4j_self_query.ipynb
+++ b/docs/docs/integrations/retrievers/self_query/neo4j_self_query.ipynb
@@ -0,0 +1,403 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Neo4j\n",
+    "\n",
+    ">[Neo4j](https://neo4j.com/docs/) is a graph database that stores nodes and relationships, that also supports native vector search.\n",
+    "\n",
+    "In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `Neo4j` vector store. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating a Neo4j vector store\n",
+    "First we'll want to create a Neo4j vector store and seed it with some data. We've created a small demo set of documents that contain summaries of movies."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: neo4j in /Users/moyi/git/langchain/env/lib/python3.11/site-packages (5.24.0)\n",
+      "Requirement already satisfied: pytz in /Users/moyi/git/langchain/env/lib/python3.11/site-packages (from neo4j) (2024.1)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install --upgrade neo4j"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "OpenAI API Key: ········\n"
+     ]
+    }
+   ],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "Neo4j URL: ········\n",
+      "Neo4j User Name: ········\n",
+      "Neo4j Password: ········\n"
+     ]
+    }
+   ],
+   "source": [
+    "# To run this notebook, you can set up a free neo4j account on neo4j.com and input the following information.\n",
+    "# (If you are having trouble connecting to the database, try using neo4j+ssc: instead of neo4j+s)\n",
+    "\n",
+    "os.environ[\"NEO4J_URI\"] = getpass.getpass(\"Neo4j URL:\")\n",
+    "os.environ[\"NEO4J_USERNAME\"] = getpass.getpass(\"Neo4j User Name:\")\n",
+    "os.environ[\"NEO4J_PASSWORD\"] = getpass.getpass(\"Neo4j Password:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.vectorstores import Neo4jVector\n",
+    "from langchain_core.documents import Document\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "embeddings = OpenAIEmbeddings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: CALL subquery without a variable scope clause is now deprecated. Use CALL (row) { ... }} {position: line: 1, column: 21, offset: 20} for query: \"UNWIND $data AS row CALL { WITH row MERGE (c:`Chunk` {id: row.id}) WITH c, row CALL db.create.setNodeVectorProperty(c, 'embedding', row.embedding) SET c.`text` = row.text SET c += row.metadata } IN TRANSACTIONS OF 1000 ROWS \"\n"
+     ]
+    }
+   ],
+   "source": [
+    "docs = [\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
+    "        metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
+    "        metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
+    "        metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
+    "        metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Toys come alive and have a blast doing so\",\n",
+    "        metadata={\"year\": 1995, \"genre\": \"animated\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
+    "        metadata={\n",
+    "            \"year\": 1979,\n",
+    "            \"director\": \"Andrei Tarkovsky\",\n",
+    "            \"genre\": \"science fiction\",\n",
+    "            \"rating\": 9.9,\n",
+    "        },\n",
+    "    ),\n",
+    "]\n",
+    "vectorstore = Neo4jVector.from_documents(docs, embeddings)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating our self-querying retriever\n",
+    "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains.query_constructor.base import AttributeInfo\n",
+    "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
+    "from langchain_openai import OpenAI\n",
+    "\n",
+    "metadata_field_info = [\n",
+    "    AttributeInfo(\n",
+    "        name=\"genre\",\n",
+    "        description=\"The genre of the movie\",\n",
+    "        type=\"string or list[string]\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"year\",\n",
+    "        description=\"The year the movie was released\",\n",
+    "        type=\"integer\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"director\",\n",
+    "        description=\"The name of the movie director\",\n",
+    "        type=\"string\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
+    "    ),\n",
+    "]\n",
+    "document_content_description = \"Brief summary of a movie\"\n",
+    "llm = OpenAI(temperature=0)\n",
+    "retriever = SelfQueryRetriever.from_llm(\n",
+    "    llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Testing it out\n",
+    "And now we can try actually using our retriever!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'genre': 'science fiction', 'year': 1993, 'rating': 7.7}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
+       " Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so'),\n",
+       " Document(metadata={'genre': 'science fiction', 'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       " Document(metadata={'year': 2006, 'rating': 8.6, 'director': 'Satoshi Kon'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example only specifies a relevant query\n",
+    "retriever.invoke(\"What are some movies about dinosaurs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'genre': 'science fiction', 'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       " Document(metadata={'year': 2006, 'rating': 8.6, 'director': 'Satoshi Kon'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example only specifies a filter\n",
+    "retriever.invoke(\"I want to watch a movie rated higher than 8.5\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'year': 2019, 'rating': 8.3, 'director': 'Greta Gerwig'}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them')]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a query and a filter\n",
+    "retriever.invoke(\"Has Greta Gerwig directed any movies about women\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'year': 2006, 'rating': 8.6, 'director': 'Satoshi Kon'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),\n",
+       " Document(metadata={'genre': 'science fiction', 'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky'}, page_content='Three men walk into the Zone, three men walk out of the Zone')]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a composite filter\n",
+    "retriever.invoke(\"What's a highly rated (above 8.5) science fiction film?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so')]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a query and composite filter\n",
+    "retriever.invoke(\n",
+    "    \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Filter k\n",
+    "\n",
+    "We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
+    "\n",
+    "We can do this by passing `enable_limit=True` to the constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = SelfQueryRetriever.from_llm(\n",
+    "    llm,\n",
+    "    vectorstore,\n",
+    "    document_content_description,\n",
+    "    metadata_field_info,\n",
+    "    enable_limit=True,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'genre': 'science fiction', 'year': 1993, 'rating': 7.7}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
+       " Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so')]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example only specifies a relevant query\n",
+    "retriever.invoke(\"what are two movies about dinosaurs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/docs/docs/integrations/text_embedding/databricks.ipynb
+++ b/docs/docs/integrations/text_embedding/databricks.ipynb
@@ -1,22 +1,34 @@
 {
 "cells": [
  {
-   "attachments": {},
-   "cell_type": "markdown",
+   "cell_type": "raw",
+   "id": "afaf8039",
   "metadata": {},
   "source": [
-    "# Databricks\n",
+    "---\n",
+    "sidebar_label: Databricks\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a3d6f34",
+   "metadata": {},
+   "source": [
+    "# DatabricksEmbeddings\n",
    "\n",
    "> [Databricks](https://www.databricks.com/) Lakehouse Platform unifies data, analytics, and AI on one platform.\n",
    "\n",
-    "This notebook provides a quick overview for getting started with Databricks [embedding models](/docs/concepts/#embedding-models). For detailed documentation of all DatabricksEmbeddings features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.databricks.DatabricksEmbeddings.html).\n",
+    "This notebook provides a quick overview for getting started with Databricks [embedding models](/docs/concepts/#embedding-models). For detailed documentation of all `DatabricksEmbeddings` features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.databricks.DatabricksEmbeddings.html).\n",
    "\n",
    "\n",
    "\n",
    "## Overview\n",
+    "### Integration details\n",
    "\n",
-    "`DatabricksEmbeddings` class wraps an embedding model endpoint hosted on [Databricks Model Serving](https://docs.databricks.com/en/machine-learning/model-serving/index.html). This example notebook shows how to wrap your serving endpoint and use it as a embedding model in your LangChain application.\n",
-    "\n",
+    "| Class | Package |\n",
+    "| :--- | :--- |\n",
+    "| [DatabricksEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_databricks.embeddings.DatabricksEmbeddings.html) | [langchain-databricks](https://api.python.langchain.com/en/latest/databricks_api_reference.html) |\n",
    "\n",
    "### Supported Methods\n",
    "\n",
@@ -30,13 +42,9 @@
    "1. Foundation Models - Curated list of state-of-the-art foundation models such as BAAI General Embedding (BGE). These endpoint are ready to use in your Databricks workspace without any set up.\n",
    "2. Custom Models - You can also deploy custom embedding models to a serving endpoint via MLflow with\n",
    "your choice of framework such as LangChain, Pytorch, Transformers, etc.\n",
-    "3. External Models - Databricks endpoints can serve models that are hosted outside Databricks as a proxy, such as proprietary model service like OpenAI text-embedding-3.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
+    "3. External Models - Databricks endpoints can serve models that are hosted outside Databricks as a proxy, such as proprietary model service like OpenAI text-embedding-3.\n",
+    "\n",
+    "\n",
    "## Setup\n",
    "\n",
    "To access Databricks models you'll need to create a Databricks account, set up credentials (only if you are outside Databricks workspace), and install required packages.\n",
@@ -51,6 +59,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
+   "id": "36521c2a",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -63,33 +72,27 @@
  },
  {
   "cell_type": "markdown",
+   "id": "d9664366",
   "metadata": {},
   "source": [
    "### Installation\n",
    "\n",
-    "The LangChain Databricks integration lives in the `langchain-community` package. Also, `mlflow >= 2.9 ` is required to run the code in this notebook."
+    "The LangChain Databricks integration lives in the `langchain-databricks` package:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "id": "64853226",
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install -qU langchain-community mlflow>=2.9.0"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We first demonstrates how to query BGE model hosted as Foundation Models endpoint with `DatabricksEmbeddings`.\n",
-    "\n",
-    "For other type of endpoints, there are some difference in how to set up the endpoint itself, however, once the endpoint is ready, there is no difference in how to query it."
+    "%pip install -qU langchain-databricks"
   ]
  },
  {
   "cell_type": "markdown",
+   "id": "45dd1724",
   "metadata": {},
   "source": [
    "## Instantiation"
@@ -98,10 +101,11 @@
  {
   "cell_type": "code",
   "execution_count": null,
+   "id": "9ea7a09b",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.embeddings import DatabricksEmbeddings\n",
+    "from langchain_databricks import DatabricksEmbeddings\n",
    "\n",
    "embeddings = DatabricksEmbeddings(\n",
    "    endpoint=\"databricks-bge-large-en\",\n",
@@ -113,65 +117,131 @@
  },
  {
   "cell_type": "markdown",
+   "id": "77d271b6",
   "metadata": {},
   "source": [
-    "## Embed single text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[0.051055908203125, 0.007221221923828125, 0.003879547119140625]\n"
-     ]
-    }
-   ],
-   "source": [
-    "embeddings.embed_query(\"hello\")[:3]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Embed documents"
+    "## Indexing and Retrieval\n",
+    "\n",
+    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our RAG tutorials under the [working with external knowledge tutorials](/docs/tutorials/#working-with-external-knowledge).\n",
+    "\n",
+    "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "id": "d817716b",
   "metadata": {},
   "outputs": [],
   "source": [
-    "documents = [\"This is a dummy document.\", \"This is another dummy document.\"]\n",
-    "response = embeddings.embed_documents(documents)\n",
-    "print([e[:3] for e in response])  # Show first 3 elements of each embedding"
+    "# Create a vector store with a sample text\n",
+    "from langchain_core.vectorstores import InMemoryVectorStore\n",
+    "\n",
+    "text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
+    "\n",
+    "vectorstore = InMemoryVectorStore.from_texts(\n",
+    "    [text],\n",
+    "    embedding=embeddings,\n",
+    ")\n",
+    "\n",
+    "# Use the vectorstore as a retriever\n",
+    "retriever = vectorstore.as_retriever()\n",
+    "\n",
+    "# Retrieve the most similar text\n",
+    "retrieved_document = retriever.invoke(\"What is LangChain?\")\n",
+    "\n",
+    "# show the retrieved document's content\n",
+    "retrieved_document[0].page_content"
   ]
  },
  {
   "cell_type": "markdown",
+   "id": "e02b9855",
   "metadata": {},
   "source": [
-    "## Wrapping Other Types of Endpoints\n",
+    "## Direct Usage\n",
    "\n",
-    "The example above uses an embedding model hosted as a Foundation Models API. To learn about how to use the other endpoint types, please refer to the documentation for `ChatDatabricks`. While the model type is different, required steps are the same.\n",
+    "Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
    "\n",
-    "* [Custom Model Endpoint](https://python.langchain.com/v0.2/docs/integrations/chat/databricks/#wrapping-custom-model-endpoint)\n",
-    "* [External Models](https://python.langchain.com/v0.2/docs/integrations/chat/databricks/#wrapping-external-models)"
+    "You can directly call these methods to get embeddings for your own use cases.\n",
+    "\n",
+    "### Embed single texts\n",
+    "\n",
+    "You can embed single texts or documents with `embed_query`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0d2befcd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "single_vector = embeddings.embed_query(text)\n",
+    "print(str(single_vector)[:100])  # Show the first 100 characters of the vector"
   ]
  },
  {
   "cell_type": "markdown",
+   "id": "1b5a7d03",
   "metadata": {},
   "source": [
-    "## API reference\n",
+    "### Embed multiple texts\n",
    "\n",
-    "For detailed documentation of all ChatDatabricks features and configurations head to the API reference: https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.databricks.DatabricksEmbeddings.html"
+    "You can embed multiple texts with `embed_documents`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f4d6e97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text2 = (\n",
+    "    \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
+    ")\n",
+    "two_vectors = embeddings.embed_documents([text, text2])\n",
+    "for vector in two_vectors:\n",
+    "    print(str(vector)[:100])  # Show the first 100 characters of the vector"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98785c12",
+   "metadata": {},
+   "source": [
+    "### Async Usage\n",
+    "\n",
+    "You can also use `aembed_query` and `aembed_documents` for producing embeddings asynchronously:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c3bef91",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "\n",
+    "\n",
+    "async def async_example():\n",
+    "    single_vector = await embeddings.aembed_query(text)\n",
+    "    print(str(single_vector)[:100])  # Show the first 100 characters of the vector\n",
+    "\n",
+    "\n",
+    "asyncio.run(async_example())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0d053b64",
+   "metadata": {},
+   "source": [
+    "## API Reference\n",
+    "\n",
+    "For detailed documentation on `DatabricksEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.databricks.DatabricksEmbeddings.html).\n"
   ]
  }
 ],
@@ -191,9 +261,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.5"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
--- a/docs/docs/integrations/text_embedding/google_vertex_ai_palm.ipynb
+++ b/docs/docs/integrations/text_embedding/google_vertex_ai_palm.ipynb
@@ -1,89 +1,307 @@
 {
 "cells": [
  {
-   "cell_type": "markdown",
+   "cell_type": "raw",
+   "id": "afaf8039",
   "metadata": {},
   "source": [
-    "# Google Vertex AI PaLM \n",
+    "---\n",
+    "sidebar_label: Google Vertex AI \n",
+    "keywords: [Vertex AI, vertexai , Google Cloud, embeddings]\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a3d6f34",
+   "metadata": {},
+   "source": [
+    "# Google Vertex AI Embeddings \n",
    "\n",
-    ">[Vertex AI PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) is a service on Google Cloud exposing the embedding models. \n",
+    "This will help you get started with Google Vertex AI Embeddings models using LangChain. For detailed documentation on `Google Vertex AI Embeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/v0.2/api_reference/google_vertexai/embeddings/langchain_google_vertexai.embeddings.VertexAIEmbeddings.html).\n",
    "\n",
-    "Note: This integration is separate from the Google PaLM integration.\n",
+    "## Overview\n",
+    "### Integration details\n",
    "\n",
-    "By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) Customer Data to train its foundation models as part of Google Cloud`s AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
+    "| Provider | Package |\n",
+    "|:--------:|:-------:|\n",
+    "| [Google](https://python.langchain.com/v0.2/docs/integrations/platforms/google/) | [langchain-google-vertexai](https://python.langchain.com/v0.2/api_reference/google_vertexai/embeddings/langchain_google_vertexai.embeddings.VertexAIEmbeddings.html) |\n",
    "\n",
-    "To use Vertex AI PaLM you must have the `langchain-google-vertexai` Python package installed and either:\n",
-    "- Have credentials configured for your environment (gcloud, workload identity, etc...)\n",
-    "- Store the path to a service account JSON file as the GOOGLE_APPLICATION_CREDENTIALS environment variable\n",
+    "## Setup\n",
    "\n",
-    "This codebase uses the `google.auth` library which first looks for the application credentials variable mentioned above, and then looks for system-level auth.\n",
+    "To access Google Vertex AI Embeddings models you'll need to \n",
+    "- Create a Google Cloud account \n",
+    "- Install the `langchain-google-vertexai` integration package.\n",
    "\n",
-    "For more information, see: \n",
-    "- https://cloud.google.com/docs/authentication/application-default-credentials#GAC\n",
-    "- https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth\n",
-    "\n"
+    "\n",
+    "\n",
+    "\n",
+    "### Credentials\n",
+    "\n",
+    "\n",
+    "Head to [Google Cloud](https://cloud.google.com/free/) to sign up to create an account. Once you've done this set the GOOGLE_APPLICATION_CREDENTIALS environment variable:\n",
+    "\n",
+    "For more information, see:\n",
+    "\n",
+    "https://cloud.google.com/docs/authentication/application-default-credentials#GAC\n",
+    "https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "caaba519-3476-423b-a5e4-d99a10929506",
+   "metadata": {},
+   "source": [
+    "**OPTIONAL : Authenticate your notebook environment (Colab only)**\n",
+    "\n",
+    "If you're running this notebook on Google Colab, run the cell below to authenticate your environment."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
+   "id": "b0770000-3667-439b-8c46-acc5af7c8e40",
+   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet langchain langchain-google-vertexai"
+    "import sys\n",
+    "\n",
+    "if \"google.colab\" in sys.modules:\n",
+    "    from google.colab import auth\n",
+    "\n",
+    "    auth.authenticate_user()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9fbd4a33-2480-4ad1-8d56-aec730b3662b",
+   "metadata": {},
+   "source": [
+    "**Set Google Cloud project information and initialize Vertex AI SDK**\n",
+    "\n",
+    "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
+    "\n",
+    "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
+   "id": "36521c2a",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_google_vertexai import VertexAIEmbeddings"
+    "PROJECT_ID = \"[your-project-id]\"  # @param {type:\"string\"}\n",
+    "LOCATION = \"us-central1\"  # @param {type:\"string\"}\n",
+    "\n",
+    "import vertexai\n",
+    "\n",
+    "vertexai.init(project=PROJECT_ID, location=LOCATION)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c84fb993",
+   "metadata": {},
+   "source": [
+    "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39a4953b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
+    "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d9664366",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "The LangChain Google Vertex AI Embeddings integration lives in the `langchain-google-vertexai` package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "64853226",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain-google-vertexai"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "45dd1724",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our model object and generate embeddings:\n",
+    ">Check the list of [Supported Models](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#supported-models)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
+   "id": "9ea7a09b",
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = VertexAIEmbeddings()"
+    "from langchain_google_vertexai import VertexAIEmbeddings\n",
+    "\n",
+    "# Initialize the a specific Embeddings Model version\n",
+    "embeddings = VertexAIEmbeddings(model_name=\"text-embedding-004\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "77d271b6",
+   "metadata": {},
+   "source": [
+    "## Indexing and Retrieval\n",
+    "\n",
+    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our RAG tutorials under the [working with external knowledge tutorials](/docs/tutorials/#working-with-external-knowledge).\n",
+    "\n",
+    "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
+   "id": "d817716b",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'LangChain is the framework for building context-aware reasoning applications'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "text = \"This is a test document.\""
+    "# Create a vector store with a sample text\n",
+    "from langchain_core.vectorstores import InMemoryVectorStore\n",
+    "\n",
+    "text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
+    "\n",
+    "vectorstore = InMemoryVectorStore.from_texts(\n",
+    "    [text],\n",
+    "    embedding=embeddings,\n",
+    ")\n",
+    "\n",
+    "# Use the vectorstore as a retriever\n",
+    "retriever = vectorstore.as_retriever()\n",
+    "\n",
+    "# Retrieve the most similar text\n",
+    "retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
+    "\n",
+    "# show the retrieved document's content\n",
+    "retrieved_documents[0].page_content"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e02b9855",
+   "metadata": {},
+   "source": [
+    "## Direct Usage\n",
+    "\n",
+    "Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
+    "\n",
+    "You can directly call these methods to get embeddings for your own use cases.\n",
+    "\n",
+    "### Embed single texts\n",
+    "\n",
+    "You can embed single texts or documents with `embed_query`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0d2befcd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.02831101417541504, 0.022063178941607475, -0.07454229146242142, 0.006448323838412762, 0.001955120\n"
+     ]
+    }
+   ],
+   "source": [
+    "single_vector = embeddings.embed_query(text)\n",
+    "print(str(single_vector)[:100])  # Show the first 100 characters of the vector"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b5a7d03",
+   "metadata": {},
+   "source": [
+    "### Embed multiple texts\n",
+    "\n",
+    "You can embed multiple texts with `embed_documents`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
+   "id": "2f4d6e97",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.01092718355357647, 0.01213780976831913, -0.05650627985596657, 0.006737854331731796, 0.0085973171\n",
+      "[0.010135706514120102, 0.01234869472682476, -0.07284046709537506, 0.00027134662377648056, 0.01546290\n"
+     ]
+    }
+   ],
   "source": [
-    "query_result = embeddings.embed_query(text)"
+    "text2 = (\n",
+    "    \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
+    ")\n",
+    "two_vectors = embeddings.embed_documents([text, text2])\n",
+    "for vector in two_vectors:\n",
+    "    print(str(vector)[:100])  # Show the first 100 characters of the vector"
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": 6,
+   "cell_type": "markdown",
+   "id": "98785c12",
   "metadata": {},
-   "outputs": [],
   "source": [
-    "doc_result = embeddings.embed_documents([text])"
+    "## API Reference\n",
+    "\n",
+    "For detailed documentation on `Google Vertex AI Embeddings\n",
+    "` features and configuration options, please refer to the [API reference](https://python.langchain.com/v0.2/api_reference/google_vertexai/embeddings/langchain_google_vertexai.embeddings.VertexAIEmbeddings.html).\n"
   ]
  }
 ],
 "metadata": {
+  "environment": {
+   "kernel": "python310",
+   "name": "tf2-gpu.2-6.m104",
+   "type": "gcloud",
+   "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-6:m104"
+  },
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
@@ -97,14 +315,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "cc99336516f23363341912c6723b01ace86f02e26b4290be1efc0677e2e2ec24"
-   }
+   "version": "3.11.6"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
--- a/docs/docs/integrations/text_embedding/ipex_llm_gpu.ipynb
+++ b/docs/docs/integrations/text_embedding/ipex_llm_gpu.ipynb
@@ -17,9 +17,9 @@
    "## Install Prerequisites\n",
    "To benefit from IPEX-LLM on Intel GPUs, there are several prerequisite steps for tools installation and environment preparation.\n",
    "\n",
-    "If you are a Windows user, visit the [Install IPEX-LLM on Windows with Intel GPU Guide](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_windows_gpu.html), and follow [Install Prerequisites](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_windows_gpu.html#install-prerequisites) to update GPU driver (optional) and install Conda.\n",
+    "If you are a Windows user, visit the [Install IPEX-LLM on Windows with Intel GPU Guide](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_windows_gpu.md), and follow [Install Prerequisites](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_windows_gpu.md#install-prerequisites) to update GPU driver (optional) and install Conda.\n",
    "\n",
-    "If you are a Linux user, visit the [Install IPEX-LLM on Linux with Intel GPU](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_linux_gpu.html), and follow [**Install Prerequisites**](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_linux_gpu.html#install-prerequisites) to install GPU driver, Intel® oneAPI Base Toolkit 2024.0, and Conda.\n",
+    "If you are a Linux user, visit the [Install IPEX-LLM on Linux with Intel GPU](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md), and follow [**Install Prerequisites**](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md#install-prerequisites) to install GPU driver, Intel® oneAPI Base Toolkit 2024.0, and Conda.\n",
    "\n",
    "## Setup\n",
    "\n",
@@ -105,7 +105,7 @@
    ">\n",
    "> For the first time that each model runs on Intel iGPU/Intel Arc A300-Series or Pro A60, it may take several minutes to compile.\n",
    ">\n",
-    "> For other GPU type, please refer to [here](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Overview/install_gpu.html#runtime-configuration) for Windows users, and  [here](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Overview/install_gpu.html#id5) for Linux users.\n",
+    "> For other GPU type, please refer to [here](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Overview/install_gpu.md#runtime-configuration) for Windows users, and  [here](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Overview/install_gpu.md#runtime-configuration-1) for Linux users.\n",
    "\n",
    "\n",
    "## Basic Usage\n",
--- a/docs/docs/integrations/text_embedding/nemo.ipynb
+++ b/docs/docs/integrations/text_embedding/nemo.ipynb
@@ -1,121 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "abede47c-6a58-40c3-b7ef-10966a4fc085",
-   "metadata": {},
-   "source": [
-    "# NVIDIA NeMo embeddings"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "38f3d4ce-b36a-48c6-88b0-5970c26bb146",
-   "metadata": {},
-   "source": [
-    "Connect to NVIDIA's embedding service using the `NeMoEmbeddings` class.\n",
-    "\n",
-    "The NeMo Retriever Embedding Microservice (NREM) brings the power of state-of-the-art text embedding to your applications, providing unmatched natural language processing and understanding capabilities. Whether you're developing semantic search, Retrieval Augmented Generation (RAG) pipelines—or any application that needs to use text embeddings—NREM has you covered. Built on the NVIDIA software platform incorporating CUDA, TensorRT, and Triton, NREM brings state of the art GPU accelerated Text Embedding model serving.\n",
-    "\n",
-    "NREM uses NVIDIA's TensorRT built on top of the Triton Inference Server for optimized inference of text embedding models."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f5ab6ea1-d074-4f36-ae45-50312a6a82b9",
-   "metadata": {},
-   "source": [
-    "## Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "32deab16-530d-455c-b40c-914db048cb05",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.embeddings import NeMoEmbeddings"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "de40023c-3391-474d-96cf-fbfb2311e9d7",
-   "metadata": {},
-   "source": [
-    "## Setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "37177018-47f4-48be-8575-83ce5c9a5447",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "batch_size = 16\n",
-    "model = \"NV-Embed-QA-003\"\n",
-    "api_endpoint_url = \"http://localhost:8080/v1/embeddings\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "08161ed2-8ba3-4226-a387-15c348f8c343",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Checking if endpoint is live: http://localhost:8080/v1/embeddings\n"
-     ]
-    }
-   ],
-   "source": [
-    "embedding_model = NeMoEmbeddings(\n",
-    "    batch_size=batch_size, model=model, api_endpoint_url=api_endpoint_url\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c69070c3-fe2d-4ff7-be4a-73304e2c4f3e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "embedding_model.embed_query(\"This is a test.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5d1d8852-5298-40b5-89c4-5a91ccfc95e5",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/integrations/text_embedding/text_embeddings_inference.ipynb
+++ b/docs/docs/integrations/text_embedding/text_embeddings_inference.ipynb
@@ -39,7 +39,9 @@
    "volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run\n",
    "\n",
    "docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.6 --model-id $model --revision $revision\n",
-    "```"
+    "```\n",
+    "\n",
+    "Specifics on Docker usage might vary with the underlying hardware. For example, to serve the model on Intel Gaudi/Gaudi2 hardware, refer to the [tei-gaudi repository](https://github.com/huggingface/tei-gaudi) for the relevant docker run command."
   ]
  },
  {
--- a/docs/docs/integrations/tools/google_cloud_texttospeech.ipynb
+++ b/docs/docs/integrations/tools/google_cloud_texttospeech.ipynb
@@ -8,6 +8,8 @@
    "# Google Cloud Text-to-Speech\n",
    "\n",
    ">[Google Cloud Text-to-Speech](https://cloud.google.com/text-to-speech) enables developers to synthesize natural-sounding speech with 100+ voices, available in multiple languages and variants. It applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks to deliver the highest fidelity possible.\n",
+    ">\n",
+    ">It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi.\n",
    "\n",
    "This notebook shows how to interact with the `Google Cloud Text-to-Speech API` to achieve speech synthesis capabilities."
   ]
@@ -22,12 +24,38 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da",
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet  google-cloud-text-to-speech langchain-community"
+    "!pip install --upgrade langchain-google-community[texttospeech]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5b86ad38-ac8a-4f0a-a492-01a6e3090c8c",
+   "metadata": {},
+   "source": [
+    "## Instantiation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "e2efded2-894b-4683-89ed-2a6948913fa9",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-01T17:47:31.565221Z",
+     "iopub.status.busy": "2024-09-01T17:47:31.564804Z",
+     "iopub.status.idle": "2024-09-01T17:47:31.570600Z",
+     "shell.execute_reply": "2024-09-01T17:47:31.569764Z",
+     "shell.execute_reply.started": "2024-09-01T17:47:31.565188Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_google_community import TextToSpeechTool"
   ]
  },
  {
@@ -35,18 +63,34 @@
   "id": "434b2454-2bff-484d-822c-4026a9dc1383",
   "metadata": {},
   "source": [
-    "## Usage"
+    "## Deprecated GoogleCloudTextToSpeechTool"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-01T17:51:28.763915Z",
+     "iopub.status.busy": "2024-09-01T17:51:28.763664Z",
+     "iopub.status.idle": "2024-09-01T17:51:28.779073Z",
+     "shell.execute_reply": "2024-09-01T17:51:28.778477Z",
+     "shell.execute_reply.started": "2024-09-01T17:51:28.763897Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_community.tools import GoogleCloudTextToSpeechTool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
+   "id": "a2647bc5-e494-41f9-9f53-4a278ea30cc1",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.tools import GoogleCloudTextToSpeechTool\n",
-    "\n",
    "text_to_speak = \"Hello world!\"\n",
    "\n",
    "tts = GoogleCloudTextToSpeechTool()\n",
--- a/docs/docs/integrations/vectorstores/databricks_vector_search.ipynb
+++ b/docs/docs/integrations/vectorstores/databricks_vector_search.ipynb
@@ -1,139 +1,185 @@
 {
 "cells": [
  {
-   "cell_type": "markdown",
+   "cell_type": "raw",
+   "id": "1957f5cb",
   "metadata": {},
   "source": [
-    "# Databricks Vector Search\n",
+    "---\n",
+    "sidebar_label: Databricks\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ef1f0986",
+   "metadata": {},
+   "source": [
+    "# DatabricksVectorSearch\n",
    "\n",
-    "Databricks Vector Search is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from Delta tables managed by Unity Catalog and query them with a simple API to return the most similar vectors.\n",
+    "[Databricks Vector Search](https://docs.databricks.com/en/generative-ai/vector-search.html) is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from Delta tables managed by Unity Catalog and query them with a simple API to return the most similar vectors.\n",
    "\n",
    "This notebook shows how to use LangChain with Databricks Vector Search."
   ]
  },
  {
   "cell_type": "markdown",
+   "id": "36fdc060",
   "metadata": {},
   "source": [
-    "Install `databricks-vectorsearch` and related Python packages used in this notebook."
+    "## Setup\n",
+    "\n",
+    "To access Databricks models you'll need to create a Databricks account, set up credentials (only if you are outside Databricks workspace), and install required packages.\n",
+    "\n",
+    "### Credentials (only if you are outside Databricks)\n",
+    "\n",
+    "If you are running LangChain app inside Databricks, you can skip this step.\n",
+    "\n",
+    "Otherwise, you need manually set the Databricks workspace hostname and personal access token to `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables, respectively. See [Authentication Documentation](https://docs.databricks.com/en/dev-tools/auth/index.html#databricks-personal-access-tokens) for how to get an access token."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%pip install --upgrade --quiet  langchain-core databricks-vectorsearch langchain-openai tiktoken"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Use `OpenAIEmbeddings` for the embeddings."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
+   "id": "5fb2788f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import getpass\n",
    "import os\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
+    "os.environ[\"DATABRICKS_HOST\"] = \"https://your-databricks-workspace\"\n",
+    "os.environ[\"DATABRICKS_TOKEN\"] = getpass.getpass(\"Enter your Databricks access token: \")"
   ]
  },
  {
   "cell_type": "markdown",
+   "id": "93df377e",
   "metadata": {},
   "source": [
-    "Split documents and get embeddings."
+    "### Installation\n",
+    "\n",
+    "The LangChain Databricks integration lives in the `langchain-databricks` package."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "id": "b03d22f1",
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
   "outputs": [],
   "source": [
-    "from langchain_community.document_loaders import TextLoader\n",
-    "from langchain_openai import OpenAIEmbeddings\n",
-    "from langchain_text_splitters import CharacterTextSplitter\n",
-    "\n",
-    "loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
-    "documents = loader.load()\n",
-    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
-    "docs = text_splitter.split_documents(documents)\n",
-    "\n",
-    "embeddings = OpenAIEmbeddings()\n",
-    "emb_dim = len(embeddings.embed_query(\"hello\"))"
+    "%pip install -qU langchain-databricks"
   ]
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "id": "08c6ef75",
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
   "source": [
-    "## Setup Databricks Vector Search client"
+    "### Create a Vector Search Endpoint and Index (if you haven't already)\n",
+    "\n",
+    "In this section, we will create a Databricks Vector Search endpoint and an index using the client SDK.\n",
+    "\n",
+    "If you already have an endpoint and an index, you can skip the section and go straight to \"Instantiation\" section."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "db62918b",
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
+   "source": [
+    "First, instantiate the Databricks VectorSearch client:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "id": "c0f2957b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from databricks.vector_search.client import VectorSearchClient\n",
    "\n",
-    "vsc = VectorSearchClient()"
+    "client = VectorSearchClient()"
   ]
  },
  {
   "cell_type": "markdown",
+   "id": "31311046",
   "metadata": {},
   "source": [
-    "## Create a Vector Search Endpoint\n",
-    "This endpoint is used to create and access vector search indexes."
+    "Next, we will create a new VectorSearch endpoint."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
+   "id": "be8f7d3a",
   "metadata": {},
   "outputs": [],
   "source": [
-    "vsc.create_endpoint(name=\"vector_search_demo_endpoint\", endpoint_type=\"STANDARD\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create Direct Vector Access Index\n",
-    "Direct Vector Access Index supports direct read and write of embedding vectors and metadata through a REST API or an SDK. For this index, you manage embedding vectors and index updates yourself."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "vector_search_endpoint_name = \"vector_search_demo_endpoint\"\n",
-    "index_name = \"vector_search_demo.vector_search.state_of_the_union_index\"\n",
+    "endpoint_name = \"<your-endpoint-name>\"\n",
    "\n",
-    "index = vsc.create_direct_access_index(\n",
-    "    endpoint_name=vector_search_endpoint_name,\n",
+    "client.create_endpoint(name=endpoint_name, endpoint_type=\"STANDARD\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "63498435",
+   "metadata": {},
+   "source": [
+    "Lastly, we will create an index that cna be queried on the endpoint. There are two types of indexes in Databricks Vector Search and the `DatabricksVectorSearch` class support both use cases.\n",
+    "\n",
+    "* **Delta Sync Index** automatically syncs with a source Delta Table, automatically and incrementally updating the index as the underlying data in the Delta Table changes.\n",
+    "\n",
+    "* **Direct Vector Access Index** supports direct read and write of vectors and metadata. The user is responsible for updating this table using the REST API or the Python SDK.\n",
+    "\n",
+    "Also for delta-sync index, you can choose to use Databricks-managed embeddings or self-managed embeddings (via LangChain embeddings classes)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "863d7218",
+   "metadata": {},
+   "source": [
+    "The following code creates a **direct-access** index. Please refer to the [Databricks documentation](https://docs.databricks.com/en/generative-ai/create-query-vector-search.html) for the instruction to create the other type of indexes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "474aea5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index_name = \"<your-index-name>\"  # Format: \"<catalog>.<schema>.<index-name>\"\n",
+    "\n",
+    "index = client.create_direct_access_index(\n",
+    "    endpoint_name=endpoint_name,\n",
    "    index_name=index_name,\n",
    "    primary_key=\"id\",\n",
-    "    embedding_dimension=emb_dim,\n",
+    "    # Dimension of the embeddings. Please change according to the embedding model you are using.\n",
+    "    embedding_dimension=3072,\n",
+    "    # A column to store the embedding vectors for the text data\n",
    "    embedding_vector_column=\"text_vector\",\n",
    "    schema={\n",
    "        \"id\": \"string\",\n",
    "        \"text\": \"string\",\n",
    "        \"text_vector\": \"array<float>\",\n",
+    "        # Optional metadata columns\n",
    "        \"source\": \"string\",\n",
    "    },\n",
    ")\n",
@@ -141,90 +187,333 @@
    "index.describe()"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "979bea9b",
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "The instantiation of `DatabricksVectorSearch` is a bit different depending on whether your index uses Databricks-managed embeddings or self-managed embeddings i.e. LangChain Embeddings object of your choice."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d34c1b01",
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
+   "source": [
+    "If you are using a delta-sync index with Databricks-managed embeddings:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052",
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
-    "from langchain_community.vectorstores import DatabricksVectorSearch\n",
+    "from langchain_databricks.vectorstores import DatabricksVectorSearch\n",
    "\n",
-    "dvs = DatabricksVectorSearch(\n",
-    "    index, text_column=\"text\", embedding=embeddings, columns=[\"source\"]\n",
+    "vector_store = DatabricksVectorSearch(\n",
+    "    endpoint=endpoint_name,\n",
+    "    index_name=index_name,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
+   "id": "f48e4e85",
   "metadata": {},
   "source": [
-    "## Add docs to the index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dvs.add_documents(docs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Similarity search\n",
-    "Optional keyword arguments to similarity_search include specifying k number of documents to retrive, \n",
-    "a filters dictionary for metadata filtering based on [this syntax](https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#use-filters-on-queries),\n",
-    "as well as the [query_type](https://api-docs.databricks.com/python/vector-search/databricks.vector_search.html#databricks.vector_search.index.VectorSearchIndex.similarity_search) which can be ANN or HYBRID "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
-    "dvs.similarity_search(query)\n",
-    "print(docs[0].page_content)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Work with Delta Sync Index\n",
+    "If you are using a direct-access index or a delta-sync index with self-managed embeddings,\n",
+    "you also need to provide the embedding model and text column in your source table to\n",
+    "use for the embeddings:\n",
    "\n",
-    "You can also use `DatabricksVectorSearch` to search in a Delta Sync Index. Delta Sync Index automatically syncs from a Delta table. You don't need to call `add_text`/`add_documents` manually. See [Databricks documentation page](https://docs.databricks.com/en/generative-ai/vector-search.html#delta-sync-index-with-managed-embeddings) for more details."
+    "```{=mdx}\n",
+    "import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
+    "\n",
+    "<EmbeddingTabs/>\n",
+    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "id": "ec6288a7",
   "metadata": {},
   "outputs": [],
   "source": [
-    "delta_sync_index = vsc.create_delta_sync_index(\n",
-    "    endpoint_name=vector_search_endpoint_name,\n",
-    "    source_table_name=\"vector_search_demo.vector_search.state_of_the_union\",\n",
-    "    index_name=\"vector_search_demo.vector_search.state_of_the_union_index\",\n",
-    "    pipeline_type=\"TRIGGERED\",\n",
-    "    primary_key=\"id\",\n",
-    "    embedding_source_column=\"text\",\n",
-    "    embedding_model_endpoint_name=\"e5-small-v2\",\n",
+    "# | output: false\n",
+    "# | echo: false\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b1bdbdf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vector_store = DatabricksVectorSearch(\n",
+    "    endpoint=endpoint_name,\n",
+    "    index_name=index_name,\n",
+    "    embedding=embeddings,\n",
+    "    # The column name in the index that contains the text data to be embedded\n",
+    "    text_column=\"document_content\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac6071d4",
+   "metadata": {},
+   "source": [
+    "## Manage vector store\n",
+    "\n",
+    "### Add items to vector store\n",
+    "\n",
+    "Note: Adding items to vector store via `add_documents` method is only supported for a **direct-access** index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "17f5efc0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['1', '2', '3']"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.documents import Document\n",
+    "\n",
+    "document_1 = Document(page_content=\"foo\", metadata={\"source\": \"https://example.com\"})\n",
+    "\n",
+    "document_2 = Document(page_content=\"bar\", metadata={\"source\": \"https://example.com\"})\n",
+    "\n",
+    "document_3 = Document(page_content=\"baz\", metadata={\"source\": \"https://example.com\"})\n",
+    "\n",
+    "documents = [document_1, document_2, document_3]\n",
+    "\n",
+    "vector_store.add_documents(documents=documents, ids=[\"1\", \"2\", \"3\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dcf1b905",
+   "metadata": {},
+   "source": [
+    "### Delete items from vector store\n",
+    "\n",
+    "Note: Deleting items to vector store via `delete` method is only supported for a **direct-access** index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "ef61e188",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vector_store.delete(ids=[\"3\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c3620501",
+   "metadata": {},
+   "source": [
+    "## Query vector store\n",
+    "\n",
+    "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
+    "\n",
+    "### Query directly\n",
+    "\n",
+    "Performing a simple similarity search can be done as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "aa0a16fa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* foo [{'id': '1'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "results = vector_store.similarity_search(\n",
+    "    query=\"thud\", k=1, filter={\"source\": \"https://example.com\"}\n",
    ")\n",
-    "dvs_delta_sync = DatabricksVectorSearch(delta_sync_index)\n",
-    "dvs_delta_sync.similarity_search(query)"
+    "for doc in results:\n",
+    "    print(f\"* {doc.page_content} [{doc.metadata}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "562056dd",
+   "metadata": {},
+   "source": [
+    "Note: By default, similarity search only returns the primary key and text column. If you want to retrieve the custom metadata associated with the document, pass the additional columns in the `columns` parameter when initializing the vector store."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "a1c746a2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* foo [{'source': 'https://example.com', 'id': '1'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "vector_store = DatabricksVectorSearch(\n",
+    "    endpoint=endpoint_name,\n",
+    "    index_name=index_name,\n",
+    "    embedding=embeddings,\n",
+    "    text_column=\"text\",\n",
+    "    columns=[\"source\"],\n",
+    ")\n",
+    "\n",
+    "results = vector_store.similarity_search(query=\"thud\", k=1)\n",
+    "for doc in results:\n",
+    "    print(f\"* {doc.page_content} [{doc.metadata}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ed9d733",
+   "metadata": {},
+   "source": [
+    "If you want to execute a similarity search and receive the corresponding scores you can run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "5efd2eaa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* [SIM=0.414035] foo [{'source': 'https://example.com', 'id': '1'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "results = vector_store.similarity_search_with_score(\n",
+    "    query=\"thud\", k=1, filter={\"source\": \"https://example.com\"}\n",
+    ")\n",
+    "for doc, score in results:\n",
+    "    print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c235cdc",
+   "metadata": {},
+   "source": [
+    "### Query by turning into retriever\n",
+    "\n",
+    "You can also transform the vector store into a retriever for easier usage in your chains. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "f3460093",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'source': 'https://example.com', 'id': '1'}, page_content='foo')]"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
+    "retriever.invoke(\"thud\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "901c75dc",
+   "metadata": {},
+   "source": [
+    "## Usage for retrieval-augmented generation\n",
+    "\n",
+    "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
+    "\n",
+    "- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
+    "- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
+    "- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a27244f",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For detailed documentation of all DatabricksVectorSearch features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_databricks.vectorstores.DatabricksVectorSearch.html"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "langchain-dev",
   "language": "python",
-   "name": "python3"
+   "name": "langchain-dev"
  },
  "language_info": {
   "codemirror_mode": {
@@ -236,9 +525,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
--- a/docs/docs/tutorials/llm_chain.ipynb
+++ b/docs/docs/tutorials/llm_chain.ipynb
@@ -479,8 +479,6 @@
    "\n",
    "```python\n",
    "#!/usr/bin/env python\n",
-    "from typing import List\n",
-    "\n",
    "from fastapi import FastAPI\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
@@ -512,7 +510,6 @@
    ")\n",
    "\n",
    "# 5. Adding chain route\n",
-    "\n",
    "add_routes(\n",
    "    app,\n",
    "    chain,\n",
--- a/docs/docs/tutorials/rag.ipynb
+++ b/docs/docs/tutorials/rag.ipynb
@@ -607,7 +607,7 @@
    "```{=mdx}\n",
    "<ChatModelTabs\n",
    "  customVarName=\"llm\"\n",
-    "  anthropicParams={`\"model=\"claude-3-sonnet-20240229\", temperature=0.2, max_tokens=1024\"`}\n",
+    "  anthropicParams={`model=\"claude-3-sonnet-20240229\", temperature=0.2, max_tokens=1024`}\n",
    "/>\n",
    "```\n",
    "\n",
@@ -957,7 +957,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.11.5"
  }
 },
 "nbformat": 4,
--- a/docs/docs/tutorials/retrievers.ipynb
+++ b/docs/docs/tutorials/retrievers.ipynb
@@ -337,8 +337,6 @@
    }
   ],
   "source": [
-    "from typing import List\n",
-    "\n",
    "from langchain_core.documents import Document\n",
    "from langchain_core.runnables import RunnableLambda\n",
    "\n",
--- a/docs/docs/tutorials/summarization.ipynb
+++ b/docs/docs/tutorials/summarization.ipynb
@@ -130,7 +130,7 @@
    "\n",
    "2. `Map-reduce`: Summarize each document on its own in a \"map\" step and then \"reduce\" the summaries into a final summary (see [here](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain.html) for more on the `MapReduceDocumentsChain`, which is used for this method).\n",
    "\n",
-    "Note that map-reduce is especially effective when understanding of a sub-document does not rely on preceeding context. For example, when summarizing a corpus of many, shorter documents. In other cases, such as summarizing a novel or body of text with an inherent sequence, [iterative refinement](/docs/how_to/summarize_refine) may be more effective."
+    "Note that map-reduce is especially effective when understanding of a sub-document does not rely on preceding context. For example, when summarizing a corpus of many, shorter documents. In other cases, such as summarizing a novel or body of text with an inherent sequence, [iterative refinement](/docs/how_to/summarize_refine) may be more effective."
   ]
  },
  {
@@ -346,7 +346,7 @@
    "\n",
    "Note that the map step is typically parallelized over the input documents.\n",
    "\n",
-    "[LangGraph](https://langchain-ai.github.io/langgraph/), built on top of `langchain-core`, suports [map-reduce](https://langchain-ai.github.io/langgraph/how-tos/map-reduce/) workflows and is well-suited to this problem:\n",
+    "[LangGraph](https://langchain-ai.github.io/langgraph/), built on top of `langchain-core`, supports [map-reduce](https://langchain-ai.github.io/langgraph/how-tos/map-reduce/) workflows and is well-suited to this problem:\n",
    "\n",
    "- LangGraph allows for individual steps (such as successive summarizations) to be streamed, allowing for greater control of execution;\n",
    "- LangGraph's [checkpointing](https://langchain-ai.github.io/langgraph/how-tos/persistence/) supports error recovery, extending with human-in-the-loop workflows, and easier incorporation into conversational applications.\n",
--- a/docs/docusaurus.config.js
+++ b/docs/docusaurus.config.js
@@ -194,11 +194,6 @@ const config = {
                docId: "contributing/index",
                label: "Contributing",
              },
-              {
-                type: "docSidebar",
-                sidebarId: "templates",
-                label: "Templates",
-              },
              {
                label: "Cookbooks",
                href: "https://github.com/langchain-ai/langchain/blob/master/cookbook/README.md"
@@ -247,14 +242,6 @@ const config = {
                href: "https://docs.smith.langchain.com/",
                label: "LangSmith Docs",
              },
-              {
-                href: "https://github.com/langchain-ai/langchain/tree/master/templates",
-                label: "Templates GitHub",
-              },
-              {
-                label: "Templates Hub",
-                href: "https://templates.langchain.com",
-              },
              {
                href: "https://smith.langchain.com/hub",
                label: "LangChain Hub",
--- a/docs/ignore-step.sh
+++ b/docs/ignore-step.sh
@@ -8,16 +8,16 @@ if [ "$VERCEL_ENV" == "production" ] || [ "$VERCEL_GIT_COMMIT_REF" == "master" ]
    echo "✅ Production build - proceeding with build"
    exit 1; 
 else 
-    echo "Checking for changes in docs/ and templates/:"
+    echo "Checking for changes in docs/"
    echo "---"
-    git log -n 50 --pretty=format:"%s" -- . ../templates | grep -v '(#'
+    git log -n 50 --pretty=format:"%s" -- . | grep -v '(#'
    if [ $? -eq 0 ]; then
        echo "---"
-        echo "✅ Changes detected in docs/ or templates/ - proceeding with build"
+        echo "✅ Changes detected in docs/ - proceeding with build"
        exit 1
    else
        echo "---"
-        echo "🛑 No changes detected in docs/ or templates/ - ignoring build"
+        echo "🛑 No changes detected in docs/ - ignoring build"
        exit 0
    fi
 fi
--- a/docs/scripts/arxiv_references.py
+++ b/docs/scripts/arxiv_references.py
@@ -397,7 +397,7 @@ class ArxivAPIWrapper(BaseModel):


 def _format_doc_url(doc_path: str) -> str:
-    return f"https://{LANGCHAIN_PYTHON_URL}/{doc_path}"
+    return f"https://{LANGCHAIN_PYTHON_URL}/v0.2/{doc_path}"


 def _format_api_ref_url(doc_path: str, compact: bool = False) -> str:
@@ -523,10 +523,9 @@ This page contains `arXiv` papers referenced in the LangChain Documentation, API
 Templates, and Cookbooks.

 From the opposite direction, scientists use `LangChain` in research and reference it in the research papers. 
-Here you find papers that reference:
- [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header)
- [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header)
- [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)
+
+`arXiv` papers with references to:
+ [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header) | [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header) | [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)

 ## Summary

@@ -564,7 +563,7 @@ Here you find papers that reference:
                refs += [
                    "`Cookbook:` "
                    + ", ".join(
-                        f"[{key}]({url})"
+                        f"[{str(key).replace('_', ' ').title()}]({url})"
                        for key, url in paper.referencing_cookbook2url.items()
                    )
                ]
@@ -572,7 +571,7 @@ Here you find papers that reference:

            title_link = f"[{paper.title}]({paper.url})"
            f.write(
-                f"| {' | '.join([f'`{paper.arxiv_id}` {title_link}', ', '.join(paper.authors), paper.published_date, refs_str])}\n"
+                f"| {' | '.join([f'`{paper.arxiv_id}` {title_link}', ', '.join(paper.authors), paper.published_date.replace('-', '&#8209;'), refs_str])}\n"
            )

        for paper in papers:
@@ -607,9 +606,8 @@ Here you find papers that reference:
                f"""
 ## {paper.title}

- **arXiv id:** [{paper.arxiv_id}]({paper.url})  **Published Date:** {paper.published_date}
- **Title:** {paper.title}
 - **Authors:** {', '.join(paper.authors)}
+- **arXiv id:** [{paper.arxiv_id}]({paper.url})  **Published Date:** {paper.published_date}
 - **LangChain:**

 {refs}
--- a/docs/scripts/check_templates.py
+++ b/docs/scripts/check_templates.py
@@ -66,19 +66,22 @@ def check_header_order(path: Path) -> None:

    with open(path, "r") as f:
        doc = f.read()
-    regex = r".*".join(headers)
-    if not re.search(regex, doc, re.DOTALL):
-        issueline = (
-            (
-                " Please see https://github.com/langchain-ai/langchain/issues/"
-                f"{issue_number} for instructions on how to correctly format a "
-                f"{doc_dir} integration page."
-            )
-            if isinstance(issue_number, int)
-            else ""
-        )
+    notfound = []
+    for header in headers:
+        index = doc.find(header)
+        if index == -1:
+            notfound.append(header)
+        doc = doc[index + len(header) :]
+    if notfound:
+        notfound_headers = "\n- ".join(notfound)
        raise ValueError(
-            f"Document {path} does not match the expected header order.{issueline}"
+            f"Document {path} is missing headers:"
+            "\n- "
+            f"{notfound_headers}"
+            "\n\n"
+            "Please see https://github.com/langchain-ai/langchain/issues/"
+            f"{issue_number} for instructions on how to correctly format a "
+            f"{doc_dir} integration page."
        )


--- a/docs/scripts/copy_templates.py
+++ b/docs/scripts/copy_templates.py
@@ -1,46 +0,0 @@
-import glob
-import os
-import re
-import shutil
-import sys
-from pathlib import Path
-
-if __name__ == "__main__":
-    intermediate_dir = Path(sys.argv[1])
-
-    templates_source_dir = Path(os.path.abspath(__file__)).parents[2] / "templates"
-    templates_intermediate_dir = intermediate_dir / "templates"
-
-    readmes = list(glob.glob(str(templates_source_dir) + "/*/README.md"))
-    destinations = [
-        readme[len(str(templates_source_dir)) + 1 : -10] + ".md" for readme in readmes
-    ]
-    for source, destination in zip(readmes, destinations):
-        full_destination = templates_intermediate_dir / destination
-        shutil.copyfile(source, full_destination)
-        with open(full_destination, "r") as f:
-            content = f.read()
-        # remove images
-        content = re.sub(r"\!\[.*?\]\((.*?)\)", "", content)
-        with open(full_destination, "w") as f:
-            f.write(content)
-
-    sidebar_hidden = """---
-sidebar_class_name: hidden
-custom_edit_url:
---
-
-"""
-
-    # handle index file
-    templates_index_source = templates_source_dir / "docs" / "INDEX.md"
-    templates_index_intermediate = templates_intermediate_dir / "index.md"
-
-    with open(templates_index_source, "r") as f:
-        content = f.read()
-
-    # replace relative links
-    content = re.sub(r"\]\(\.\.\/", "](/docs/templates/", content)
-
-    with open(templates_index_intermediate, "w") as f:
-        f.write(sidebar_hidden + content)
--- a/docs/scripts/tool_feat_table.py
+++ b/docs/scripts/tool_feat_table.py
@@ -71,6 +71,7 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
        "upload": True,
        "return_results": "Text",
        "link": "/docs/integrations/tools/bearly",
+        "self_hosting": False,
    },
    "Riza Code Interpreter": {
        "langauges": "Python, JavaScript, PHP, Ruby",
@@ -78,6 +79,7 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
        "upload": False,
        "return_results": "Text",
        "link": "/docs/integrations/tools/riza",
+        "self_hosting": True,
    },
    "E2B Data Analysis": {
        "langauges": "Python. In beta: JavaScript, R, Java",
@@ -85,6 +87,7 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
        "upload": True,
        "return_results": "Text, Images, Videos",
        "link": "/docs/integrations/tools/e2b_data_analysis",
+        "self_hosting": True,
    },
    "Azure Container Apps dynamic sessions": {
        "langauges": "Python",
@@ -92,6 +95,7 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
        "upload": True,
        "return_results": "Text, Images",
        "link": "/docs/integrations/tools/azure_dynamic_sessions",
+        "self_hosting": False,
    },
 }

@@ -301,13 +305,14 @@ def get_search_tools_table() -> str:


 def get_code_interpreter_table() -> str:
-    """Get the table of search tools."""
+    """Get the table of code interpreter tools."""
    header = [
        "tool",
        "langauges",
        "sandbox_lifetime",
        "upload",
        "return_results",
+        "self_hosting",
    ]
    title = [
        "Tool/Toolkit",
@@ -315,6 +320,7 @@ def get_code_interpreter_table() -> str:
        "Sandbox Lifetime",
        "Supports File Uploads",
        "Return Types",
+        "Supports Self-Hosting",
    ]
    rows = [title, [":-"] + [":-:"] * (len(title) - 1)]
    for search_tool, feats in sorted(CODE_INTERPRETER_TOOL_FEAT_TABLE.items()):
@@ -324,7 +330,7 @@ def get_code_interpreter_table() -> str:
        ]
        for h in header[1:]:
            value = feats.get(h)
-            if h == "upload":
+            if h == "upload" or h == "self_hosting":
                if value is True:
                    row.append("✅")
                else:
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -383,22 +383,6 @@ module.exports = {
      },
    },
  ],
-  templates: [
-    {
-      type: "category",
-      label: "Templates",
-      items: [
-        {
-          type: "autogenerated",
-          dirName: "templates",
-        },
-      ],
-      link: {
-        type: "doc",
-        id: "templates/index",
-      },
-    },
-  ],
  contributing: [
    {
      type: "category",
@@ -414,6 +398,7 @@ module.exports = {
        { type: "doc", id: "contributing/documentation/style_guide", className: "hidden" },
        { type: "doc", id: "contributing/documentation/setup", className: "hidden" },
        "contributing/testing",
+        "contributing/review_process",
        "contributing/faq",
      ],
      collapsible: false,
--- a/docs/src/theme/FeatureTables.js
+++ b/docs/src/theme/FeatureTables.js
@@ -890,7 +890,7 @@ const FEATURE_TABLES = {
            {title: "Passes Standard Tests", formatter: (item) => item.passesStandardTests ? "✅" : "❌"},
            {title: "Multi Tenancy", formatter: (item) => item.multiTenancy ? "✅" : "❌"},
            {title: "IDs in add Documents", formatter: (item) => item.idsInAddDocuments ? "✅" : "❌"},
-            {title: "Local/Cloud", formatter: (item) => item.local ? "Local" : "Cloud"},
+            // {title: "Local/Cloud", formatter: (item) => item.local ? "Local" : "Cloud"},
        ],
        items: [
            {
@@ -951,7 +951,7 @@ const FEATURE_TABLES = {
                deleteById: true,
                filtering: true,
                searchByVector: true,
-                searchWithScore: false,
+                searchWithScore: true,
                async: true,
                passesStandardTests: false,
                multiTenancy: false,
@@ -973,7 +973,7 @@ const FEATURE_TABLES = {
            },
            {
                name: "InMemoryVectorStore",
-                link: "in_memory",
+                link: "https://python.langchain.com/v0.2/api_reference/core/vectorstores/langchain_core.vectorstores.in_memory.InMemoryVectorStore.html",
                deleteById: true,
                filtering: true,
                searchByVector: false,
@@ -1012,7 +1012,7 @@ const FEATURE_TABLES = {
            },
            {
                name: "PGVector",
-                link: "pg_vector",
+                link: "pgvector",
                deleteById: true,
                filtering: true,
                searchByVector: true,
--- a/docs/static/img/review_process_status.png
+++ b/docs/static/img/review_process_status.png
--- a/docs/vercel.json
+++ b/docs/vercel.json
@@ -109,6 +109,14 @@
    {
      "source": "/v0.2/docs/integrations/chat/ollama_functions/",
      "destination": "https://python.langchain.com/v0.1/docs/integrations/chat/ollama_functions/"
+    },
+    {
+      "source": "/v0.2/docs/templates/:path(.*/?)*",
+      "destination": "https://github.com/langchain-ai/langchain/tree/master/templates/:path*"
+    },
+    {
+      "source": "/v0.2/docs/integrations/text_embedding/nemo/",
+      "destination": "/v0.2/docs/integrations/text_embedding/nvidia_ai_endpoints/"
    }
  ]
 }
--- a/libs/cli/langchain_cli/integration_template/Makefile
+++ b/libs/cli/langchain_cli/integration_template/Makefile
@@ -30,14 +30,13 @@ lint_tests: PYTHON_FILES=tests
 lint_tests: MYPY_CACHE=.mypy_cache_test

 lint lint_diff lint_package lint_tests:
-	poetry run ruff .
-	poetry run ruff format $(PYTHON_FILES) --diff
-	poetry run ruff --select I $(PYTHON_FILES)
-	mkdir -p $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
+	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)

 format format_diff:
-	poetry run ruff format $(PYTHON_FILES)
-	poetry run ruff --select I --fix $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES)

 spell_check:
 	poetry run codespell --toml pyproject.toml
--- a/libs/community/Makefile
+++ b/libs/community/Makefile
@@ -48,14 +48,13 @@ lint lint_diff lint_package lint_tests:
 	./scripts/check_pydantic.sh .
 	./scripts/lint_imports.sh .
 	./scripts/check_pickle.sh .
-	poetry run ruff check .
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
-	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I $(PYTHON_FILES)
 	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)

 format format_diff:
-	poetry run ruff format $(PYTHON_FILES)
-	poetry run ruff check --select I --fix $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)

 spell_check:
 	poetry run codespell --toml pyproject.toml
--- a/libs/community/extended_testing_deps.txt
+++ b/libs/community/extended_testing_deps.txt
@@ -16,7 +16,6 @@ cloudpickle>=2.0.0
 cohere>=4,<6
 databricks-vectorsearch>=0.21,<0.22
 datasets>=2.15.0,<3
-dedoc>=2.2.6,<3
 dgml-utils>=0.3.0,<0.4
 elasticsearch>=8.12.0,<9
 esprima>=4.0.1,<5
@@ -92,3 +91,4 @@ xata>=1.0.0a7,<2
 xmltodict>=0.13.0,<0.14
 nanopq==0.2.1
 mlflow[genai]>=2.14.0
+databricks-sdk>=0.30.0
--- a/libs/community/langchain_community/agents/openai_assistant/base.py
+++ b/libs/community/langchain_community/agents/openai_assistant/base.py
@@ -91,7 +91,7 @@ def _is_assistants_builtin_tool(
        A boolean response of true or false indicating if the tool corresponds to
        OpenAI Assistants built-in.
    """
-    assistants_builtin_tools = ("code_interpreter", "retrieval")
+    assistants_builtin_tools = ("code_interpreter", "retrieval", "file_search")
    return (
        isinstance(tool, dict)
        and ("type" in tool)
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -2279,7 +2279,11 @@ class OpenSearchSemanticCache(BaseCache):
    """Cache that uses OpenSearch vector store backend"""

    def __init__(
-        self, opensearch_url: str, embedding: Embeddings, score_threshold: float = 0.2
+        self,
+        opensearch_url: str,
+        embedding: Embeddings,
+        score_threshold: float = 0.2,
+        **kwargs: Any,
    ):
        """
        Args:
@@ -2300,6 +2304,7 @@ class OpenSearchSemanticCache(BaseCache):
        self.opensearch_url = opensearch_url
        self.embedding = embedding
        self.score_threshold = score_threshold
+        self.connection_kwargs = kwargs

    def _index_name(self, llm_string: str) -> str:
        hashed_index = _hash(llm_string)
@@ -2317,6 +2322,7 @@ class OpenSearchSemanticCache(BaseCache):
            opensearch_url=self.opensearch_url,
            index_name=index_name,
            embedding_function=self.embedding,
+            **self.connection_kwargs,
        )

        # create index for the vectorstore
--- a/libs/community/langchain_community/chains/pebblo_retrieval/base.py
+++ b/libs/community/langchain_community/chains/pebblo_retrieval/base.py
@@ -6,6 +6,7 @@ against a vector database.
 import datetime
 import inspect
 import logging
+from importlib.metadata import version
 from typing import Any, Dict, List, Optional

 from langchain.chains.base import Chain
@@ -27,6 +28,7 @@ from langchain_community.chains.pebblo_retrieval.models import (
    App,
    AuthContext,
    ChainInfo,
+    Framework,
    Model,
    SemanticContext,
    VectorDB,
@@ -149,6 +151,7 @@ class PebbloRetrievalQA(Chain):
        res = indexqa({'query': 'This is my query'})
        answer, docs = res['result'], res['source_documents']
        """
+        prompt_time = datetime.datetime.now().isoformat()
        _run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
        question = inputs[self.input_key]
        auth_context = inputs.get(self.auth_context_key)
@@ -157,7 +160,7 @@ class PebbloRetrievalQA(Chain):
            "run_manager" in inspect.signature(self._aget_docs).parameters
        )

-        _, prompt_entities = self.pb_client.check_prompt_validity(question)
+        _, prompt_entities = await self.pb_client.acheck_prompt_validity(question)

        if accepts_run_manager:
            docs = await self._aget_docs(
@@ -169,6 +172,18 @@ class PebbloRetrievalQA(Chain):
            input_documents=docs, question=question, callbacks=_run_manager.get_child()
        )

+        await self.pb_client.asend_prompt(
+            self.app_name,
+            self.retriever,
+            question,
+            answer,
+            auth_context,
+            docs,
+            prompt_entities,
+            prompt_time,
+            self.enable_prompt_gov,
+        )
+
        if self.return_source_documents:
            return {self.output_key: answer, "source_documents": docs}
        else:
@@ -314,6 +329,10 @@ class PebbloRetrievalQA(Chain):
            framework=framework,
            chains=chains,
            plugin_version=PLUGIN_VERSION,
+            client_version=Framework(
+                name="langchain_community",
+                version=version("langchain_community"),
+            ),
        )
        return app

--- a/libs/community/langchain_community/chains/pebblo_retrieval/models.py
+++ b/libs/community/langchain_community/chains/pebblo_retrieval/models.py
@@ -123,6 +123,7 @@ class App(BaseModel):
    framework: Framework
    chains: List[ChainInfo]
    plugin_version: str
+    client_version: Framework


 class Context(BaseModel):
--- a/libs/community/langchain_community/chains/pebblo_retrieval/utilities.py
+++ b/libs/community/langchain_community/chains/pebblo_retrieval/utilities.py
@@ -6,6 +6,8 @@ from enum import Enum
 from http import HTTPStatus
 from typing import Any, Dict, List, Optional, Tuple

+import aiohttp
+from aiohttp import ClientTimeout
 from langchain_core.documents import Document
 from langchain_core.env import get_runtime_environment
 from langchain_core.pydantic_v1 import BaseModel
@@ -125,7 +127,9 @@ class PebbloRetrievalAPIWrapper(BaseModel):
        if self.classifier_location == "local":
            # Send app details to local classifier
            headers = self._make_headers()
-            app_discover_url = f"{self.classifier_url}{Routes.retrieval_app_discover}"
+            app_discover_url = (
+                f"{self.classifier_url}{Routes.retrieval_app_discover.value}"
+            )
            pebblo_resp = self.make_request("POST", app_discover_url, headers, payload)

        if self.api_key:
@@ -138,7 +142,7 @@ class PebbloRetrievalAPIWrapper(BaseModel):
                payload.update({"pebblo_server_version": pebblo_server_version})

            payload.update({"pebblo_client_version": PLUGIN_VERSION})
-            pebblo_cloud_url = f"{self.cloud_url}{Routes.retrieval_app_discover}"
+            pebblo_cloud_url = f"{self.cloud_url}{Routes.retrieval_app_discover.value}"
            _ = self.make_request("POST", pebblo_cloud_url, headers, payload)

    def send_prompt(
@@ -184,7 +188,7 @@ class PebbloRetrievalAPIWrapper(BaseModel):
        if self.classifier_location == "local":
            # Send prompt to local classifier
            headers = self._make_headers()
-            prompt_url = f"{self.classifier_url}{Routes.prompt}"
+            prompt_url = f"{self.classifier_url}{Routes.prompt.value}"
            pebblo_resp = self.make_request("POST", prompt_url, headers, payload)

        if self.api_key:
@@ -196,12 +200,74 @@ class PebbloRetrievalAPIWrapper(BaseModel):
                self.update_cloud_payload(payload, pebblo_resp)

            headers = self._make_headers(cloud_request=True)
-            pebblo_cloud_prompt_url = f"{self.cloud_url}{Routes.prompt}"
+            pebblo_cloud_prompt_url = f"{self.cloud_url}{Routes.prompt.value}"
            _ = self.make_request("POST", pebblo_cloud_prompt_url, headers, payload)
        elif self.classifier_location == "pebblo-cloud":
            logger.warning("API key is missing for sending prompt to Pebblo cloud.")
            raise NameError("API key is missing for sending prompt to Pebblo cloud.")

+    async def asend_prompt(
+        self,
+        app_name: str,
+        retriever: VectorStoreRetriever,
+        question: str,
+        answer: str,
+        auth_context: Optional[AuthContext],
+        docs: List[Document],
+        prompt_entities: Dict[str, Any],
+        prompt_time: str,
+        prompt_gov_enabled: bool = False,
+    ) -> None:
+        """
+        Send prompt to Pebblo server for classification.
+        Then send prompt to Daxa cloud(If api_key is present).
+
+        Args:
+            app_name (str): Name of the app.
+            retriever (VectorStoreRetriever): Retriever instance.
+            question (str): Question asked in the prompt.
+            answer (str): Answer generated by the model.
+            auth_context (Optional[AuthContext]): Authentication context.
+            docs (List[Document]): List of documents retrieved.
+            prompt_entities (Dict[str, Any]): Entities present in the prompt.
+            prompt_time (str): Time when the prompt was generated.
+            prompt_gov_enabled (bool): Whether prompt governance is enabled.
+        """
+        pebblo_resp = None
+        payload = self.build_prompt_qa_payload(
+            app_name,
+            retriever,
+            question,
+            answer,
+            auth_context,
+            docs,
+            prompt_entities,
+            prompt_time,
+            prompt_gov_enabled,
+        )
+
+        if self.classifier_location == "local":
+            # Send prompt to local classifier
+            headers = self._make_headers()
+            prompt_url = f"{self.classifier_url}{Routes.prompt.value}"
+            pebblo_resp = await self.amake_request("POST", prompt_url, headers, payload)
+
+        if self.api_key:
+            # Send prompt to Pebblo cloud if api_key is present
+            if self.classifier_location == "local":
+                # If classifier location is local, then response, context and prompt
+                # should be fetched from pebblo_resp and replaced in payload.
+                self.update_cloud_payload(payload, pebblo_resp)
+
+            headers = self._make_headers(cloud_request=True)
+            pebblo_cloud_prompt_url = f"{self.cloud_url}{Routes.prompt.value}"
+            _ = await self.amake_request(
+                "POST", pebblo_cloud_prompt_url, headers, payload
+            )
+        elif self.classifier_location == "pebblo-cloud":
+            logger.warning("API key is missing for sending prompt to Pebblo cloud.")
+            raise NameError("API key is missing for sending prompt to Pebblo cloud.")
+
    def check_prompt_validity(self, question: str) -> Tuple[bool, Dict[str, Any]]:
        """
        Check the validity of the given prompt using a remote classification service.
@@ -222,18 +288,52 @@ class PebbloRetrievalAPIWrapper(BaseModel):
        is_valid_prompt: bool = True
        if self.classifier_location == "local":
            headers = self._make_headers()
-            prompt_gov_api_url = f"{self.classifier_url}{Routes.prompt_governance}"
+            prompt_gov_api_url = (
+                f"{self.classifier_url}{Routes.prompt_governance.value}"
+            )
            pebblo_resp = self.make_request(
                "POST", prompt_gov_api_url, headers, prompt_payload
            )
            if pebblo_resp:
-                logger.debug(f"pebblo_resp.json() {pebblo_resp.json()}")
                prompt_entities["entities"] = pebblo_resp.json().get("entities", {})
                prompt_entities["entityCount"] = pebblo_resp.json().get(
                    "entityCount", 0
                )
        return is_valid_prompt, prompt_entities

+    async def acheck_prompt_validity(
+        self, question: str
+    ) -> Tuple[bool, Dict[str, Any]]:
+        """
+        Check the validity of the given prompt using a remote classification service.
+
+        This method sends a prompt to a remote classifier service and return entities
+        present in prompt or not.
+
+        Args:
+            question (str): The prompt question to be validated.
+
+        Returns:
+            bool: True if the prompt is valid (does not contain deny list entities),
+            False otherwise.
+            dict: The entities present in the prompt
+        """
+        prompt_payload = {"prompt": question}
+        prompt_entities: dict = {"entities": {}, "entityCount": 0}
+        is_valid_prompt: bool = True
+        if self.classifier_location == "local":
+            headers = self._make_headers()
+            prompt_gov_api_url = (
+                f"{self.classifier_url}{Routes.prompt_governance.value}"
+            )
+            pebblo_resp = await self.amake_request(
+                "POST", prompt_gov_api_url, headers, prompt_payload
+            )
+            if pebblo_resp:
+                prompt_entities["entities"] = pebblo_resp.get("entities", {})
+                prompt_entities["entityCount"] = pebblo_resp.get("entityCount", 0)
+        return is_valid_prompt, prompt_entities
+
    def _make_headers(self, cloud_request: bool = False) -> dict:
        """
        Generate headers for the request.
@@ -332,6 +432,56 @@ class PebbloRetrievalAPIWrapper(BaseModel):
            payload["prompt"] = {}
            payload["context"] = []

+    @staticmethod
+    async def amake_request(
+        method: str,
+        url: str,
+        headers: dict,
+        payload: Optional[dict] = None,
+        timeout: int = 20,
+    ) -> Any:
+        """
+        Make a async request to the Pebblo server/cloud API.
+
+        Args:
+            method (str): HTTP method (GET, POST, PUT, DELETE, etc.).
+            url (str): URL for the request.
+            headers (dict): Headers for the request.
+            payload (Optional[dict]): Payload for the request (for POST, PUT, etc.).
+            timeout (int): Timeout for the request in seconds.
+
+        Returns:
+            Any: Response json if the request is successful.
+        """
+        try:
+            client_timeout = ClientTimeout(total=timeout)
+            async with aiohttp.ClientSession() as asession:
+                async with asession.request(
+                    method=method,
+                    url=url,
+                    json=payload,
+                    headers=headers,
+                    timeout=client_timeout,
+                ) as response:
+                    if response.status >= HTTPStatus.INTERNAL_SERVER_ERROR:
+                        logger.warning(f"Pebblo Server: Error {response.status}")
+                    elif response.status >= HTTPStatus.BAD_REQUEST:
+                        logger.warning(
+                            f"Pebblo received an invalid payload: " f"{response.text}"
+                        )
+                    elif response.status != HTTPStatus.OK:
+                        logger.warning(
+                            f"Pebblo returned an unexpected response code: "
+                            f"{response.status}"
+                        )
+                    response_json = await response.json()
+            return response_json
+        except RequestException:
+            logger.warning("Unable to reach server %s", url)
+        except Exception as e:
+            logger.warning("An Exception caught in amake_request: %s", e)
+        return None
+
    def build_prompt_qa_payload(
        self,
        app_name: str,
--- a/libs/community/langchain_community/chat_loaders/imessage.py
+++ b/libs/community/langchain_community/chat_loaders/imessage.py
@@ -68,7 +68,8 @@ class IMessageChatLoader(BaseChatLoader):
                "Please install it with `pip install pysqlite3`"
            ) from e

-    def _parse_attributedBody(self, attributedBody: bytes) -> str:
+    @staticmethod
+    def _parse_attributed_body(attributed_body: bytes) -> str:
        """
        Parse the attributedBody field of the message table
        for the text content of the message.
@@ -88,17 +89,18 @@ class IMessageChatLoader(BaseChatLoader):
          that byte.

        Args:
-            attributedBody (bytes): attributedBody field of the message table.
+            attributed_body (bytes): attributedBody field of the message table.
        Return:
            str: Text content of the message.
        """
-        content = attributedBody.split(b"NSString")[1][5:]
+        content = attributed_body.split(b"NSString")[1][5:]
        length, start = content[0], 1
        if content[0] == 129:
            length, start = int.from_bytes(content[1:3], "little"), 3
        return content[start : start + length].decode("utf-8", errors="ignore")

-    def _get_session_query(self, use_chat_handle_table: bool) -> str:
+    @staticmethod
+    def _get_session_query(use_chat_handle_table: bool) -> str:
        # Messages sent pre OSX 12 require a join through the chat_handle_join table
        # However, the table doesn't exist if database created with OSX 12 or above.

@@ -151,7 +153,7 @@ class IMessageChatLoader(BaseChatLoader):
            if text:
                content = text
            elif attributedBody:
-                content = self._parse_attributedBody(attributedBody)
+                content = self._parse_attributed_body(attributedBody)
            else:  # Skip messages with no content
                continue

--- a/libs/community/langchain_community/chat_models/init.py
+++ b/libs/community/langchain_community/chat_models/init.py
@@ -122,6 +122,9 @@ if TYPE_CHECKING:
    from langchain_community.chat_models.mlx import (
        ChatMLX,
    )
+    from langchain_community.chat_models.moonshot import (
+        MoonshotChat,
+    )
    from langchain_community.chat_models.oci_generative_ai import (
        ChatOCIGenAI,  # noqa: F401
    )
@@ -153,6 +156,7 @@ if TYPE_CHECKING:
    from langchain_community.chat_models.sparkllm import (
        ChatSparkLLM,
    )
+    from langchain_community.chat_models.symblai_nebula import ChatNebula
    from langchain_community.chat_models.tongyi import (
        ChatTongyi,
    )
@@ -201,6 +205,7 @@ __all__ = [
    "ChatMLflowAIGateway",
    "ChatMaritalk",
    "ChatMlflow",
+    "ChatNebula",
    "ChatOCIGenAI",
    "ChatOllama",
    "ChatOpenAI",
@@ -222,6 +227,7 @@ __all__ = [
    "JinaChat",
    "LlamaEdgeChatService",
    "MiniMaxChat",
+    "MoonshotChat",
    "PaiEasChatEndpoint",
    "PromptLayerChatOpenAI",
    "QianfanChatEndpoint",
@@ -257,6 +263,7 @@ _module_lookup = {
    "ChatMLX": "langchain_community.chat_models.mlx",
    "ChatMaritalk": "langchain_community.chat_models.maritalk",
    "ChatMlflow": "langchain_community.chat_models.mlflow",
+    "ChatNebula": "langchain_community.chat_models.symblai_nebula",
    "ChatOctoAI": "langchain_community.chat_models.octoai",
    "ChatOCIGenAI": "langchain_community.chat_models.oci_generative_ai",
    "ChatOllama": "langchain_community.chat_models.ollama",
@@ -277,6 +284,7 @@ _module_lookup = {
    "JinaChat": "langchain_community.chat_models.jinachat",
    "LlamaEdgeChatService": "langchain_community.chat_models.llama_edge",
    "MiniMaxChat": "langchain_community.chat_models.minimax",
+    "MoonshotChat": "langchain_community.chat_models.moonshot",
    "PaiEasChatEndpoint": "langchain_community.chat_models.pai_eas_endpoint",
    "PromptLayerChatOpenAI": "langchain_community.chat_models.promptlayer_openai",
    "SolarChat": "langchain_community.chat_models.solar",
--- a/libs/community/langchain_community/chat_models/deepinfra.py
+++ b/libs/community/langchain_community/chat_models/deepinfra.py
@@ -222,6 +222,11 @@ class ChatDeepInfra(BaseChatModel):
    streaming: bool = False
    max_retries: int = 1

+    class Config:
+        """Configuration for this pydantic object."""
+
+        allow_population_by_field_name = True
+
    @property
    def _default_params(self) -> Dict[str, Any]:
        """Get the default parameters for calling OpenAI API."""
--- a/libs/community/langchain_community/chat_models/gigachat.py
+++ b/libs/community/langchain_community/chat_models/gigachat.py
@@ -268,6 +268,7 @@ class GigaChat(_BaseGigaChat, BaseChatModel):
                dict(finish_reason=finish_reason) if finish_reason is not None else None
            )

-            yield ChatGenerationChunk(message=chunk, generation_info=generation_info)
            if run_manager:
                await run_manager.on_llm_new_token(content)
+
+            yield ChatGenerationChunk(message=chunk, generation_info=generation_info)
--- a/libs/community/langchain_community/chat_models/hunyuan.py
+++ b/libs/community/langchain_community/chat_models/hunyuan.py
@@ -16,6 +16,7 @@ from langchain_core.messages import (
    ChatMessageChunk,
    HumanMessage,
    HumanMessageChunk,
+    SystemMessage,
 )
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
 from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
@@ -33,6 +34,8 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
    message_dict: Dict[str, Any]
    if isinstance(message, ChatMessage):
        message_dict = {"Role": message.role, "Content": message.content}
+    elif isinstance(message, SystemMessage):
+        message_dict = {"Role": "system", "Content": message.content}
    elif isinstance(message, HumanMessage):
        message_dict = {"Role": "user", "Content": message.content}
    elif isinstance(message, AIMessage):
@@ -45,7 +48,9 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:

 def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
    role = _dict["Role"]
-    if role == "user":
+    if role == "system":
+        return SystemMessage(content=_dict.get("Content", "") or "")
+    elif role == "user":
        return HumanMessage(content=_dict["Content"])
    elif role == "assistant":
        return AIMessage(content=_dict.get("Content", "") or "")
@@ -73,6 +78,7 @@ def _create_chat_result(response: Mapping[str, Any]) -> ChatResult:
    generations = []
    for choice in response["Choices"]:
        message = _convert_dict_to_message(choice["Message"])
+        message.id = response.get("Id", "")
        generations.append(ChatGeneration(message=message))

    token_usage = response["Usage"]
@@ -115,7 +121,7 @@ class ChatHunyuan(BaseChatModel):
    model: str = "hunyuan-lite"
    """What Model to use.
     Optional model:
-    - hunyuan-lite、
+    - hunyuan-lite
    - hunyuan-standard
    - hunyuan-standard-256K
    - hunyuan-pro
@@ -233,6 +239,7 @@ class ChatHunyuan(BaseChatModel):
                chunk = _convert_delta_to_message_chunk(
                    choice["Delta"], default_chunk_class
                )
+                chunk.id = response.get("Id", "")
                default_chunk_class = chunk.__class__
                cg_chunk = ChatGenerationChunk(message=chunk)
                if run_manager:
--- a/libs/community/langchain_community/chat_models/moonshot.py
+++ b/libs/community/langchain_community/chat_models/moonshot.py
@@ -33,7 +33,11 @@ class MoonshotChat(MoonshotCommon, ChatOpenAI):  # type: ignore[misc]
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that the environment is set up correctly."""
        values["moonshot_api_key"] = convert_to_secret_str(
-            get_from_dict_or_env(values, "moonshot_api_key", "MOONSHOT_API_KEY")
+            get_from_dict_or_env(
+                values,
+                ["moonshot_api_key", "api_key", "openai_api_key"],
+                "MOONSHOT_API_KEY",
+            )
        )

        try:
--- a/libs/community/langchain_community/chat_models/octoai.py
+++ b/libs/community/langchain_community/chat_models/octoai.py
@@ -32,8 +32,8 @@ class ChatOctoAI(ChatOpenAI):
    """

    octoai_api_base: str = Field(default=DEFAULT_API_BASE)
-    octoai_api_token: SecretStr = Field(default=None)
-    model_name: str = Field(default=DEFAULT_MODEL)
+    octoai_api_token: SecretStr = Field(default=None, alias="api_key")
+    model_name: str = Field(default=DEFAULT_MODEL, alias="model")

    @property
    def _llm_type(self) -> str:
--- a/libs/community/langchain_community/chat_models/premai.py
+++ b/libs/community/langchain_community/chat_models/premai.py
@@ -271,13 +271,22 @@ class ChatPremAI(BaseChatModel, BaseModel):
    If model name is other than default model then it will override the calls 
    from the model deployed from launchpad."""

-    temperature: Optional[float] = None
+    session_id: Optional[str] = None
+    """The ID of the session to use. It helps to track the chat history."""
+
+    temperature: Optional[float] = Field(default=None)
    """Model temperature. Value should be >= 0 and <= 1.0"""

-    max_tokens: Optional[int] = None
+    top_p: Optional[float] = None
+    """top_p adjusts the number of choices for each predicted tokens based on
+        cumulative probabilities. Value should be ranging between 0.0 and 1.0. 
+    """
+
+    max_tokens: Optional[int] = Field(default=None)
+
    """The maximum number of tokens to generate"""

-    max_retries: int = 1
+    max_retries: int = Field(default=1)
    """Max number of retries to call the API"""

    system_prompt: Optional[str] = ""
--- a/libs/community/langchain_community/chat_models/sparkllm.py
+++ b/libs/community/langchain_community/chat_models/sparkllm.py
@@ -8,7 +8,7 @@ import threading
 from datetime import datetime
 from queue import Queue
 from time import mktime
-from typing import Any, Dict, Generator, Iterator, List, Mapping, Optional, Type
+from typing import Any, Dict, Generator, Iterator, List, Mapping, Optional, Type, cast
 from urllib.parse import urlencode, urlparse, urlunparse
 from wsgiref.handlers import format_date_time

@@ -26,9 +26,15 @@ from langchain_core.messages import (
    BaseMessageChunk,
    ChatMessage,
    ChatMessageChunk,
+    FunctionMessageChunk,
    HumanMessage,
    HumanMessageChunk,
    SystemMessage,
+    ToolMessageChunk,
+)
+from langchain_core.output_parsers.openai_tools import (
+    make_invalid_tool_call,
+    parse_tool_call,
 )
 from langchain_core.outputs import (
    ChatGeneration,
@@ -48,13 +54,24 @@ SPARK_API_URL = "wss://spark-api.xf-yun.com/v3.5/chat"
 SPARK_LLM_DOMAIN = "generalv3.5"


-def _convert_message_to_dict(message: BaseMessage) -> dict:
+def convert_message_to_dict(message: BaseMessage) -> dict:
+    message_dict: Dict[str, Any]
    if isinstance(message, ChatMessage):
        message_dict = {"role": "user", "content": message.content}
    elif isinstance(message, HumanMessage):
        message_dict = {"role": "user", "content": message.content}
    elif isinstance(message, AIMessage):
        message_dict = {"role": "assistant", "content": message.content}
+        if "function_call" in message.additional_kwargs:
+            message_dict["function_call"] = message.additional_kwargs["function_call"]
+            # If function call only, content is None not empty string
+            if message_dict["content"] == "":
+                message_dict["content"] = None
+        if "tool_calls" in message.additional_kwargs:
+            message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]
+            # If tool calls only, content is None not empty string
+            if message_dict["content"] == "":
+                message_dict["content"] = None
    elif isinstance(message, SystemMessage):
        message_dict = {"role": "system", "content": message.content}
    else:
@@ -63,14 +80,35 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
    return message_dict


-def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
+def convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
    msg_role = _dict["role"]
    msg_content = _dict["content"]
    if msg_role == "user":
        return HumanMessage(content=msg_content)
    elif msg_role == "assistant":
+        invalid_tool_calls = []
+        additional_kwargs: Dict = {}
+        if function_call := _dict.get("function_call"):
+            additional_kwargs["function_call"] = dict(function_call)
+        tool_calls = []
+        if raw_tool_calls := _dict.get("tool_calls"):
+            additional_kwargs["tool_calls"] = raw_tool_calls
+            for raw_tool_call in _dict["tool_calls"]:
+                try:
+                    tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))
+                except Exception as e:
+                    invalid_tool_calls.append(
+                        make_invalid_tool_call(raw_tool_call, str(e))
+                    )
+        else:
+            additional_kwargs = {}
        content = msg_content or ""
-        return AIMessage(content=content)
+        return AIMessage(
+            content=content,
+            additional_kwargs=additional_kwargs,
+            tool_calls=tool_calls,
+            invalid_tool_calls=invalid_tool_calls,
+        )
    elif msg_role == "system":
        return SystemMessage(content=msg_content)
    else:
@@ -80,12 +118,24 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
 def _convert_delta_to_message_chunk(
    _dict: Mapping[str, Any], default_class: Type[BaseMessageChunk]
 ) -> BaseMessageChunk:
-    msg_role = _dict["role"]
-    msg_content = _dict.get("content", "")
+    msg_role = cast(str, _dict.get("role"))
+    msg_content = cast(str, _dict.get("content") or "")
+    additional_kwargs: Dict = {}
+    if _dict.get("function_call"):
+        function_call = dict(_dict["function_call"])
+        if "name" in function_call and function_call["name"] is None:
+            function_call["name"] = ""
+        additional_kwargs["function_call"] = function_call
+    if _dict.get("tool_calls"):
+        additional_kwargs["tool_calls"] = _dict["tool_calls"]
    if msg_role == "user" or default_class == HumanMessageChunk:
        return HumanMessageChunk(content=msg_content)
    elif msg_role == "assistant" or default_class == AIMessageChunk:
-        return AIMessageChunk(content=msg_content)
+        return AIMessageChunk(content=msg_content, additional_kwargs=additional_kwargs)
+    elif msg_role == "function" or default_class == FunctionMessageChunk:
+        return FunctionMessageChunk(content=msg_content, name=_dict["name"])
+    elif msg_role == "tool" or default_class == ToolMessageChunk:
+        return ToolMessageChunk(content=msg_content, tool_call_id=_dict["tool_call_id"])
    elif msg_role or default_class == ChatMessageChunk:
        return ChatMessageChunk(content=msg_content, role=msg_role)
    else:
@@ -335,7 +385,7 @@ class ChatSparkLLM(BaseChatModel):
        default_chunk_class = AIMessageChunk

        self.client.arun(
-            [_convert_message_to_dict(m) for m in messages],
+            [convert_message_to_dict(m) for m in messages],
            self.spark_user_id,
            self.model_kwargs,
            streaming=True,
@@ -365,7 +415,7 @@ class ChatSparkLLM(BaseChatModel):
            return generate_from_stream(stream_iter)

        self.client.arun(
-            [_convert_message_to_dict(m) for m in messages],
+            [convert_message_to_dict(m) for m in messages],
            self.spark_user_id,
            self.model_kwargs,
            False,
@@ -378,7 +428,7 @@ class ChatSparkLLM(BaseChatModel):
            if "data" not in content:
                continue
            completion = content["data"]
-        message = _convert_dict_to_message(completion)
+        message = convert_dict_to_message(completion)
        generations = [ChatGeneration(message=message)]
        return ChatResult(generations=generations, llm_output=llm_output)

--- a/libs/community/langchain_community/chat_models/symblai_nebula.py
+++ b/libs/community/langchain_community/chat_models/symblai_nebula.py
@@ -0,0 +1,271 @@
+import json
+import os
+from json import JSONDecodeError
+from typing import Any, AsyncIterator, Dict, Iterator, List, Optional
+
+import requests
+from aiohttp import ClientSession
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
+from langchain_core.language_models.chat_models import (
+    BaseChatModel,
+    agenerate_from_stream,
+    generate_from_stream,
+)
+from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.pydantic_v1 import Field, SecretStr
+from langchain_core.utils import convert_to_secret_str
+
+
+def _convert_role(role: str) -> str:
+    map = {"ai": "assistant", "human": "human", "chat": "human"}
+    if role in map:
+        return map[role]
+    else:
+        raise ValueError(f"Unknown role type: {role}")
+
+
+def _format_nebula_messages(messages: List[BaseMessage]) -> Dict[str, Any]:
+    system = ""
+    formatted_messages = []
+    for message in messages[:-1]:
+        if message.type == "system":
+            if isinstance(message.content, str):
+                system = message.content
+            else:
+                raise ValueError("System prompt must be a string")
+        else:
+            formatted_messages.append(
+                {
+                    "role": _convert_role(message.type),
+                    "text": message.content,
+                }
+            )
+
+    text = messages[-1].content
+    formatted_messages.append({"role": "human", "text": text})
+    return {"system_prompt": system, "messages": formatted_messages}
+
+
+class ChatNebula(BaseChatModel):
+    """`Nebula` chat large language model - https://docs.symbl.ai/docs/nebula-llm
+
+    API Reference: https://docs.symbl.ai/reference/nebula-chat
+
+    To use, set the environment variable ``NEBULA_API_KEY``,
+    or pass it as a named parameter to the constructor.
+    To request an API key, visit https://platform.symbl.ai/#/login
+    Example:
+        .. code-block:: python
+
+            from langchain_community.chat_models import ChatNebula
+            from langchain_core.messages import SystemMessage, HumanMessage
+
+            chat = ChatNebula(max_new_tokens=1024, temperature=0.5)
+
+            messages = [
+            SystemMessage(
+                content="You are a helpful assistant."
+            ),
+            HumanMessage(
+                "Answer the following question. How can I help save the world."
+            ),
+            ]
+            chat.invoke(messages)
+    """
+
+    max_new_tokens: int = 1024
+    """Denotes the number of tokens to predict per generation."""
+
+    temperature: Optional[float] = 0
+    """A non-negative float that tunes the degree of randomness in generation."""
+
+    streaming: bool = False
+
+    nebula_api_url: str = "https://api-nebula.symbl.ai"
+
+    nebula_api_key: Optional[SecretStr] = Field(None, description="Nebula API Token")
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        allow_population_by_field_name = True
+        arbitrary_types_allowed = True
+
+    def __init__(self, **kwargs: Any) -> None:
+        if "nebula_api_key" in kwargs:
+            api_key = convert_to_secret_str(kwargs.pop("nebula_api_key"))
+        elif "NEBULA_API_KEY" in os.environ:
+            api_key = convert_to_secret_str(os.environ["NEBULA_API_KEY"])
+        else:
+            api_key = None
+        super().__init__(nebula_api_key=api_key, **kwargs)  # type: ignore[call-arg]
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of chat model."""
+        return "nebula-chat"
+
+    @property
+    def _api_key(self) -> str:
+        if self.nebula_api_key:
+            return self.nebula_api_key.get_secret_value()
+        return ""
+
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        """Call out to Nebula's chat endpoint."""
+        url = f"{self.nebula_api_url}/v1/model/chat/streaming"
+        headers = {
+            "ApiKey": self._api_key,
+            "Content-Type": "application/json",
+        }
+        formatted_data = _format_nebula_messages(messages=messages)
+        payload: Dict[str, Any] = {
+            "max_new_tokens": self.max_new_tokens,
+            "temperature": self.temperature,
+            **formatted_data,
+            **kwargs,
+        }
+
+        payload = {k: v for k, v in payload.items() if v is not None}
+        json_payload = json.dumps(payload)
+
+        response = requests.request(
+            "POST", url, headers=headers, data=json_payload, stream=True
+        )
+        response.raise_for_status()
+
+        for chunk_response in response.iter_lines():
+            chunk_decoded = chunk_response.decode()[6:]
+            try:
+                chunk = json.loads(chunk_decoded)
+            except JSONDecodeError:
+                continue
+            token = chunk["delta"]
+            cg_chunk = ChatGenerationChunk(message=AIMessageChunk(content=token))
+            if run_manager:
+                run_manager.on_llm_new_token(token, chunk=cg_chunk)
+            yield cg_chunk
+
+    async def _astream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[ChatGenerationChunk]:
+        url = f"{self.nebula_api_url}/v1/model/chat/streaming"
+        headers = {"ApiKey": self._api_key, "Content-Type": "application/json"}
+        formatted_data = _format_nebula_messages(messages=messages)
+        payload: Dict[str, Any] = {
+            "max_new_tokens": self.max_new_tokens,
+            "temperature": self.temperature,
+            **formatted_data,
+            **kwargs,
+        }
+
+        payload = {k: v for k, v in payload.items() if v is not None}
+        json_payload = json.dumps(payload)
+
+        async with ClientSession() as session:
+            async with session.post(  # type: ignore[call-arg]
+                url, data=json_payload, headers=headers, stream=True
+            ) as response:
+                response.raise_for_status()
+                async for chunk_response in response.content:
+                    chunk_decoded = chunk_response.decode()[6:]
+                    try:
+                        chunk = json.loads(chunk_decoded)
+                    except JSONDecodeError:
+                        continue
+                    token = chunk["delta"]
+                    cg_chunk = ChatGenerationChunk(
+                        message=AIMessageChunk(content=token)
+                    )
+                    if run_manager:
+                        await run_manager.on_llm_new_token(token, chunk=cg_chunk)
+                    yield cg_chunk
+
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        if self.streaming:
+            stream_iter = self._stream(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            )
+            return generate_from_stream(stream_iter)
+
+        url = f"{self.nebula_api_url}/v1/model/chat"
+        headers = {"ApiKey": self._api_key, "Content-Type": "application/json"}
+        formatted_data = _format_nebula_messages(messages=messages)
+        payload: Dict[str, Any] = {
+            "max_new_tokens": self.max_new_tokens,
+            "temperature": self.temperature,
+            **formatted_data,
+            **kwargs,
+        }
+
+        payload = {k: v for k, v in payload.items() if v is not None}
+        json_payload = json.dumps(payload)
+
+        response = requests.request("POST", url, headers=headers, data=json_payload)
+        response.raise_for_status()
+        data = response.json()
+
+        return ChatResult(
+            generations=[ChatGeneration(message=AIMessage(content=data["messages"]))],
+            llm_output=data,
+        )
+
+    async def _agenerate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        if self.streaming:
+            stream_iter = self._astream(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            )
+            return await agenerate_from_stream(stream_iter)
+
+        url = f"{self.nebula_api_url}/v1/model/chat"
+        headers = {"ApiKey": self._api_key, "Content-Type": "application/json"}
+        formatted_data = _format_nebula_messages(messages=messages)
+        payload: Dict[str, Any] = {
+            "max_new_tokens": self.max_new_tokens,
+            "temperature": self.temperature,
+            **formatted_data,
+            **kwargs,
+        }
+
+        payload = {k: v for k, v in payload.items() if v is not None}
+        json_payload = json.dumps(payload)
+
+        async with ClientSession() as session:
+            async with session.post(
+                url, data=json_payload, headers=headers
+            ) as response:
+                response.raise_for_status()
+                data = await response.json()
+
+                return ChatResult(
+                    generations=[
+                        ChatGeneration(message=AIMessage(content=data["messages"]))
+                    ],
+                    llm_output=data,
+                )
--- a/libs/community/langchain_community/chat_models/yandex.py
+++ b/libs/community/langchain_community/chat_models/yandex.py
@@ -170,7 +170,7 @@ def _make_request(
        messages=[Message(**message) for message in message_history],
    )
    stub = TextGenerationServiceStub(channel)
-    res = stub.Completion(request, metadata=self._grpc_metadata)
+    res = stub.Completion(request, metadata=self.grpc_metadata)
    return list(res)[0].alternatives[0].message.text


@@ -229,7 +229,7 @@ async def _amake_request(self: ChatYandexGPT, messages: List[BaseMessage]) -> st
            messages=[Message(**message) for message in message_history],
        )
        stub = TextGenerationAsyncServiceStub(channel)
-        operation = await stub.Completion(request, metadata=self._grpc_metadata)
+        operation = await stub.Completion(request, metadata=self.grpc_metadata)
        async with grpc.aio.secure_channel(
            operation_api_url, channel_credentials
        ) as operation_channel:
@@ -239,7 +239,7 @@ async def _amake_request(self: ChatYandexGPT, messages: List[BaseMessage]) -> st
                operation_request = GetOperationRequest(operation_id=operation.id)
                operation = await operation_stub.Get(
                    operation_request,
-                    metadata=self._grpc_metadata,
+                    metadata=self.grpc_metadata,
                )

        completion_response = CompletionResponse()
--- a/libs/community/langchain_community/chat_models/yuan2.py
+++ b/libs/community/langchain_community/chat_models/yuan2.py
@@ -93,7 +93,9 @@ class ChatYuan2(BaseChatModel):
    )
    """Base URL path for API requests, an OpenAI compatible API server."""

-    request_timeout: Optional[Union[float, Tuple[float, float]]] = None
+    request_timeout: Optional[Union[float, Tuple[float, float]]] = Field(
+        default=None, alias="timeout"
+    )
    """Timeout for requests to yuan2 completion API. Default is 600 seconds."""

    max_retries: int = 6
@@ -111,7 +113,7 @@ class ChatYuan2(BaseChatModel):
    top_p: Optional[float] = 0.9
    """The top-p value to use for sampling."""

-    stop: Optional[List[str]] = ["<eod>"]
+    stop: Optional[List[str]] = Field(default=["<eod>"], alias="stop_sequences")
    """A list of strings to stop generation when encountered."""

    repeat_last_n: Optional[int] = 64
--- a/libs/community/langchain_community/document_loaders/csv_loader.py
+++ b/libs/community/langchain_community/document_loaders/csv_loader.py
@@ -104,6 +104,8 @@ class CSVLoader(BaseLoader):
        csv_args: Optional[Dict] = None,
        encoding: Optional[str] = None,
        autodetect_encoding: bool = False,
+        *,
+        content_columns: Sequence[str] = (),
    ):
        """

@@ -116,6 +118,8 @@ class CSVLoader(BaseLoader):
              Optional. Defaults to None.
            encoding: The encoding of the CSV file. Optional. Defaults to None.
            autodetect_encoding: Whether to try to autodetect the file encoding.
+            content_columns: A sequence of column names to use for the document content.
+                If not present, use all columns that are not part of the metadata.
        """
        self.file_path = file_path
        self.source_column = source_column
@@ -123,6 +127,7 @@ class CSVLoader(BaseLoader):
        self.encoding = encoding
        self.csv_args = csv_args or {}
        self.autodetect_encoding = autodetect_encoding
+        self.content_columns = content_columns

    def lazy_load(self) -> Iterator[Document]:
        try:
@@ -163,7 +168,11 @@ class CSVLoader(BaseLoader):
                if isinstance(v, str) else ','.join(map(str.strip, v))
                if isinstance(v, list) else v}"""
                for k, v in row.items()
-                if k not in self.metadata_columns
+                if (
+                    k in self.content_columns
+                    if self.content_columns
+                    else k not in self.metadata_columns
+                )
            )
            metadata = {"source": source, "row": i}
            for col in self.metadata_columns:
--- a/Show More
+++ b/Show More