cohere: multi tool integration test

2026-02-11 11:40:19 +00:00 · 2024-03-27 19:11:22 -07:00
281 changed files with 7655 additions and 25535 deletions
--- a/.github/workflows/_test_doc_imports.yml
+++ b/.github/workflows/_test_doc_imports.yml
@@ -1,50 +0,0 @@
-name: test_doc_imports
-
-on:
-  workflow_call:
-
-env:
-  POETRY_VERSION: "1.7.1"
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version:
-          - "3.11"
-    name: "check doc imports #${{ matrix.python-version }}"
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
-        uses: "./.github/actions/poetry_setup"
-        with:
-          python-version: ${{ matrix.python-version }}
-          poetry-version: ${{ env.POETRY_VERSION }}
-          cache-key: core
-
-      - name: Install dependencies
-        shell: bash
-        run: poetry install --with test
-
-      - name: Install langchain editable
-        run: |
-          poetry run pip install -e libs/core libs/langchain libs/community libs/experimental
-
-      - name: Check doc imports
-        shell: bash
-        run: |
-          poetry run python docs/scripts/check_imports.py
-
-      - name: Ensure the test did not create any additional files
-        shell: bash
-        run: |
-          set -eu
-
-          STATUS="$(git status)"
-          echo "$STATUS"
-
-          # grep will exit non-zero if the target message isn't found,
-          # and `set -e` above will cause the step to fail.
-          echo "$STATUS" | grep 'nothing to commit, working tree clean'
--- a/.github/workflows/check-broken-links.yml
+++ b/.github/workflows/check-broken-links.yml
@@ -22,3 +22,7 @@ jobs:
      - name: Check broken links
        run: yarn check-broken-links
        working-directory: ./docs
+      - name: Check broken links for .mdx files
+        uses: gaurav-nelson/github-action-markdown-link-check@v1
+        with:
+          file-extension: '.mdx'
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -60,12 +60,6 @@ jobs:
      working-directory: ${{ matrix.working-directory }}
    secrets: inherit

-  test_doc_imports:
-    needs: [ build ]
-    if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
-    uses: ./.github/workflows/_test_doc_imports.yml
-    secrets: inherit
-
  compile-integration-tests:
    name: cd ${{ matrix.working-directory }}
    needs: [ build ]
--- a/49
+++ b/49
@@ -1,56 +1,44 @@
-.PHONY: all clean help docs_build docs_clean docs_linkcheck api_docs_build api_docs_clean api_docs_linkcheck spell_check spell_fix lint lint_package lint_tests format format_diff
+.PHONY: all clean docs_build docs_clean docs_linkcheck api_docs_build api_docs_clean api_docs_linkcheck

-## help: Show this help info.
-help: Makefile
-	@printf "\n\033[1mUsage: make <TARGETS> ...\033[0m\n\n\033[1mTargets:\033[0m\n\n"
-	@sed -n 's/^##//p' $< | awk -F':' '{printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' | sort | sed -e 's/^/ /'
-
-## all: Default target, shows help.
+# Default target executed when no arguments are given to make.
 all: help

-## clean: Clean documentation and API documentation artifacts.
-clean: docs_clean api_docs_clean

 ######################
 # DOCUMENTATION
 ######################

-## docs_build: Build the documentation.
+clean: docs_clean api_docs_clean
+
+
 docs_build:
 	docs/.local_build.sh

-## docs_clean: Clean the documentation build artifacts.
 docs_clean:
 	@if [ -d _dist ]; then \
-		rm -r _dist; \
-		echo "Directory _dist has been cleaned."; \
+			rm -r _dist; \
+			echo "Directory _dist has been cleaned."; \
 	else \
-		echo "Nothing to clean."; \
+			echo "Nothing to clean."; \
 	fi

-## docs_linkcheck: Run linkchecker on the documentation.
 docs_linkcheck:
 	poetry run linkchecker _dist/docs/ --ignore-url node_modules

-## api_docs_build: Build the API Reference documentation.
 api_docs_build:
 	poetry run python docs/api_reference/create_api_rst.py
 	cd docs/api_reference && poetry run make html

-## api_docs_clean: Clean the API Reference documentation build artifacts.
 api_docs_clean:
 	find ./docs/api_reference -name '*_api_reference.rst' -delete
 	cd docs/api_reference && poetry run make clean

-## api_docs_linkcheck: Run linkchecker on the API Reference documentation.
 api_docs_linkcheck:
 	poetry run linkchecker docs/api_reference/_build/html/index.html

-## spell_check: Run codespell on the project.
 spell_check:
 	poetry run codespell --toml pyproject.toml

-## spell_fix: Run codespell on the project and fix the errors.
 spell_fix:
 	poetry run codespell --toml pyproject.toml -w

@@ -58,14 +46,31 @@ spell_fix:
 # LINTING AND FORMATTING
 ######################

-## lint: Run linting on the project.
 lint lint_package lint_tests:
 	poetry run ruff docs templates cookbook
 	poetry run ruff format docs templates cookbook --diff
 	poetry run ruff --select I docs templates cookbook
 	git grep 'from langchain import' docs/docs templates cookbook | grep -vE 'from langchain import (hub)' && exit 1 || exit 0

-## format: Format the project files.
 format format_diff:
 	poetry run ruff format docs templates cookbook
 	poetry run ruff --select I --fix docs templates cookbook
+
+
+######################
+# HELP
+######################
+
+help:
+	@echo '===================='
+	@echo '-- DOCUMENTATION --'
+	@echo 'clean                        - run docs_clean and api_docs_clean'
+	@echo 'docs_build                   - build the documentation'
+	@echo 'docs_clean                   - clean the documentation build artifacts'
+	@echo 'docs_linkcheck               - run linkchecker on the documentation'
+	@echo 'api_docs_build               - build the API Reference documentation'
+	@echo 'api_docs_clean               - clean the API Reference documentation build artifacts'
+	@echo 'api_docs_linkcheck           - run linkchecker on the API Reference documentation'
+	@echo 'spell_check               	- run codespell on the project'
+	@echo 'spell_fix               		- run codespell on the project and fix the errors'
+	@echo '-- TEST and LINT tasks are within libs/*/ per-package --'
--- a/cookbook/langgraph_agentic_rag.ipynb
+++ b/cookbook/langgraph_agentic_rag.ipynb
--- a/cookbook/langgraph_crag.ipynb
+++ b/cookbook/langgraph_crag.ipynb
--- a/cookbook/langgraph_self_rag.ipynb
+++ b/cookbook/langgraph_self_rag.ipynb
--- a/cookbook/llm_symbolic_math.ipynb
+++ b/cookbook/llm_symbolic_math.ipynb
@@ -45,7 +45,7 @@
    }
   ],
   "source": [
-    "llm_symbolic_math.invoke(\"What is the derivative of sin(x)*exp(x) with respect to x?\")"
+    "llm_symbolic_math.run(\"What is the derivative of sin(x)*exp(x) with respect to x?\")"
   ]
  },
  {
@@ -65,7 +65,7 @@
    }
   ],
   "source": [
-    "llm_symbolic_math.invoke(\n",
+    "llm_symbolic_math.run(\n",
    "    \"What is the integral of exp(x)*sin(x) + exp(x)*cos(x) with respect to x?\"\n",
    ")"
   ]
@@ -94,7 +94,7 @@
    }
   ],
   "source": [
-    "llm_symbolic_math.invoke('Solve the differential equation y\" - y = e^t')"
+    "llm_symbolic_math.run('Solve the differential equation y\" - y = e^t')"
   ]
  },
  {
@@ -114,7 +114,7 @@
    }
   ],
   "source": [
-    "llm_symbolic_math.invoke(\"What are the solutions to this equation y^3 + 1/3y?\")"
+    "llm_symbolic_math.run(\"What are the solutions to this equation y^3 + 1/3y?\")"
   ]
  },
  {
@@ -134,7 +134,7 @@
    }
   ],
   "source": [
-    "llm_symbolic_math.invoke(\"x = y + 5, y = z - 3, z = x * y. Solve for x, y, z\")"
+    "llm_symbolic_math.run(\"x = y + 5, y = z - 3, z = x * y. Solve for x, y, z\")"
   ]
  }
 ],
--- a/cookbook/multi_modal_RAG_vdms.ipynb
+++ b/cookbook/multi_modal_RAG_vdms.ipynb
--- a/cookbook/video_captioning/video_captioning.ipynb
+++ b/cookbook/video_captioning/video_captioning.ipynb
@@ -1,174 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Video Captioning\n",
-    "This notebook shows how to use VideoCaptioningChain, which is implemented using Langchain's ImageCaptionLoader and AssemblyAI to produce .srt files.\n",
-    "\n",
-    "This system autogenerates both subtitles and closed captions from a video URL."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Installing Dependencies"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# !pip install ffmpeg-python\n",
-    "# !pip install assemblyai\n",
-    "# !pip install opencv-python\n",
-    "# !pip install torch\n",
-    "# !pip install pillow\n",
-    "# !pip install transformers\n",
-    "# !pip install langchain"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2023-11-30T03:39:14.078232Z",
-     "start_time": "2023-11-30T03:39:12.534410Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import getpass\n",
-    "\n",
-    "from langchain.chains.video_captioning import VideoCaptioningChain\n",
-    "from langchain.chat_models.openai import ChatOpenAI"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Setting up API Keys"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2023-11-30T03:39:17.423806Z",
-     "start_time": "2023-11-30T03:39:17.417945Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "OPENAI_API_KEY = getpass.getpass(\"OpenAI API Key:\")\n",
-    "\n",
-    "ASSEMBLYAI_API_KEY = getpass.getpass(\"AssemblyAI API Key:\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Required parameters:**\n",
-    "\n",
-    "* llm: The language model this chain will use to get suggestions on how to refine the closed-captions\n",
-    "* assemblyai_key: The API key for AssemblyAI, used to generate the subtitles\n",
-    "\n",
-    "**Optional Parameters:**\n",
-    "\n",
-    "* verbose (Default: True): Sets verbose mode for downstream chain calls\n",
-    "* use_logging (Default: True): Log the chain's processes in run manager\n",
-    "* frame_skip (Default: None): Choose how many video frames to skip during processing. Increasing it results in faster execution, but less accurate results. If None, frame skip is calculated manually based on the framerate Set this to 0 to sample all frames\n",
-    "* image_delta_threshold (Default: 3000000): Set the sensitivity for what the image processor considers a change in scenery in the video, used to delimit closed captions. Higher = less sensitive\n",
-    "* closed_caption_char_limit (Default: 20): Sets the character limit on closed captions\n",
-    "* closed_caption_similarity_threshold (Default: 80): Sets the percentage value to how similar two closed caption models should be in order to be clustered into one longer closed caption\n",
-    "* use_unclustered_video_models (Default: False): If true, closed captions that could not be clustered will be included. May result in spontaneous behaviour from closed captions such as very short lasting captions or fast-changing captions. Enabling this is experimental and not recommended"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# https://ia804703.us.archive.org/27/items/uh-oh-here-we-go-again/Uh-Oh%2C%20Here%20we%20go%20again.mp4\n",
-    "# https://ia601200.us.archive.org/9/items/f58703d4-61e6-4f8f-8c08-b42c7e16f7cb/f58703d4-61e6-4f8f-8c08-b42c7e16f7cb.mp4\n",
-    "\n",
-    "chain = VideoCaptioningChain(\n",
-    "    llm=ChatOpenAI(model=\"gpt-4\", max_tokens=4000, openai_api_key=OPENAI_API_KEY),\n",
-    "    assemblyai_key=ASSEMBLYAI_API_KEY,\n",
-    ")\n",
-    "\n",
-    "srt_content = chain.run(\n",
-    "    video_file_path=\"https://ia601200.us.archive.org/9/items/f58703d4-61e6-4f8f-8c08-b42c7e16f7cb/f58703d4-61e6-4f8f-8c08-b42c7e16f7cb.mp4\"\n",
-    ")\n",
-    "\n",
-    "print(srt_content)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Writing output to .srt file"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(\"output.srt\", \"w\") as file:\n",
-    "    file.write(srt_content)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "myenv",
-   "language": "python",
-   "name": "myenv"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -4,14 +4,14 @@
 # ATTENTION: When adding a service below use a non-standard port
 # increment by one from the preceding port.
 # For credentials always use `langchain` and `langchain` for the
-# username and password.
+# username and password. 
 version: "3"
 name: langchain-tests

 services:
  redis:
    image: redis/redis-stack-server:latest
-    # We use non standard ports since
+    # We use non standard ports since 
    # these instances are used for testing
    # and users may already have existing
    # redis instances set up locally
@@ -73,11 +73,6 @@ services:
      retries: 60
    volumes:
      - postgres_data_pgvector:/var/lib/postgresql/data
-  vdms:
-    image: intellabs/vdms:latest
-    container_name: vdms_container
-    ports:
-      - "6025:55555"

 volumes:
  postgres_data:
--- a/docs/docs/additional_resources/dependents.mdx
+++ b/docs/docs/additional_resources/dependents.mdx
@@ -241,6 +241,7 @@ Dependents stats for `langchain-ai/langchain`
 |[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 514 |
 |[sajjadium/ctf-archives](https://github.com/sajjadium/ctf-archives) | 507 |
 |[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 502 |
+|[llmOS/opencopilot](https://github.com/llmOS/opencopilot) | 495 |
 |[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 494 |
 |[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 493 |
 |[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 492 |
@@ -454,6 +455,7 @@ Dependents stats for `langchain-ai/langchain`
 |[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 149 |
 |[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 148 |
 |[ssheng/BentoChain](https://github.com/ssheng/BentoChain) | 148 |
+|[lmstudio-ai/examples](https://github.com/lmstudio-ai/examples) | 147 |
 |[solana-labs/chatgpt-plugin](https://github.com/solana-labs/chatgpt-plugin) | 147 |
 |[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 147 |
 |[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 146 |
--- a/docs/docs/additional_resources/youtube.mdx
+++ b/docs/docs/additional_resources/youtube.mdx
@@ -7,7 +7,7 @@
 ### Introduction to LangChain with Harrison Chase, creator of LangChain
 - [Building the Future with LLMs, `LangChain`, & `Pinecone`](https://youtu.be/nMniwlGyX-c) by [Pinecone](https://www.youtube.com/@pinecone-io)
 - [LangChain and Weaviate with Harrison Chase and Bob van Luijt - Weaviate Podcast #36](https://youtu.be/lhby7Ql7hbk) by [Weaviate • Vector Database](https://www.youtube.com/@Weaviate)
- [LangChain Demo + Q&A with Harrison Chase](https://youtu.be/zaYTXQFR0_s?t=788) by [Full Stack Deep Learning](https://www.youtube.com/@The_Full_Stack)
+- [LangChain Demo + Q&A with Harrison Chase](https://youtu.be/zaYTXQFR0_s?t=788) by [Full Stack Deep Learning](https://www.youtube.com/@FullStackDeepLearning)
 - [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI) by [Chat with data](https://www.youtube.com/@chatwithdata)

 ## Videos (sorted by views)
@@ -15,8 +15,8 @@
 - [Using `ChatGPT` with YOUR OWN Data. This is magical. (LangChain OpenAI API)](https://youtu.be/9AXP7tCI9PI) by [TechLead](https://www.youtube.com/@TechLead)
 - [First look - `ChatGPT` + `WolframAlpha` (`GPT-3.5` and Wolfram|Alpha via LangChain by James Weaver)](https://youtu.be/wYGbY811oMo) by [Dr Alan D. Thompson](https://www.youtube.com/@DrAlanDThompson) 
 - [LangChain explained - The hottest new Python framework](https://youtu.be/RoR4XJw8wIc) by [AssemblyAI](https://www.youtube.com/@AssemblyAI)
- [Chatbot with INFINITE MEMORY using `OpenAI` & `Pinecone` - `GPT-3`, `Embeddings`, `ADA`, `Vector DB`, `Semantic`](https://youtu.be/2xNzB7xq8nk) by [David Shapiro ~ AI](https://www.youtube.com/@DaveShap)
- [LangChain for LLMs is... basically just an Ansible playbook](https://youtu.be/X51N9C-OhlE) by [David Shapiro ~ AI](https://www.youtube.com/@DaveShap)
+- [Chatbot with INFINITE MEMORY using `OpenAI` & `Pinecone` - `GPT-3`, `Embeddings`, `ADA`, `Vector DB`, `Semantic`](https://youtu.be/2xNzB7xq8nk) by [David Shapiro ~ AI](https://www.youtube.com/@DavidShapiroAutomator)
+- [LangChain for LLMs is... basically just an Ansible playbook](https://youtu.be/X51N9C-OhlE) by [David Shapiro ~ AI](https://www.youtube.com/@DavidShapiroAutomator)
 - [Build your own LLM Apps with LangChain & `GPT-Index`](https://youtu.be/-75p09zFUJY) by [1littlecoder](https://www.youtube.com/@1littlecoder)
 - [`BabyAGI` - New System of Autonomous AI Agents with LangChain](https://youtu.be/lg3kJvf1kXo) by [1littlecoder](https://www.youtube.com/@1littlecoder)
 - [Run `BabyAGI` with Langchain Agents (with Python Code)](https://youtu.be/WosPGHPObx8) by [1littlecoder](https://www.youtube.com/@1littlecoder)
@@ -37,15 +37,15 @@
 - [Building AI LLM Apps with LangChain (and more?) - LIVE STREAM](https://www.youtube.com/live/M-2Cj_2fzWI?feature=share) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
 - [`ChatGPT` with any `YouTube` video using langchain and `chromadb`](https://youtu.be/TQZfB2bzVwU) by [echohive](https://www.youtube.com/@echohive)
 - [How to Talk to a `PDF` using LangChain and `ChatGPT`](https://youtu.be/v2i1YDtrIwk) by [Automata Learning Lab](https://www.youtube.com/@automatalearninglab)
- [Langchain Document Loaders Part 1: Unstructured Files](https://youtu.be/O5C0wfsen98) by [Merk](https://www.youtube.com/@heymichaeldaigler) 
- [LangChain - Prompt Templates (what all the best prompt engineers use)](https://youtu.be/1aRu8b0XNOQ) by [Nick Daigler](https://www.youtube.com/@nickdaigler)
+- [Langchain Document Loaders Part 1: Unstructured Files](https://youtu.be/O5C0wfsen98) by [Merk](https://www.youtube.com/@merksworld) 
+- [LangChain - Prompt Templates (what all the best prompt engineers use)](https://youtu.be/1aRu8b0XNOQ) by [Nick Daigler](https://www.youtube.com/@nick_daigs)
 - [LangChain. Crear aplicaciones Python impulsadas por GPT](https://youtu.be/DkW_rDndts8) by [Jesús Conde](https://www.youtube.com/@0utKast)
 - [Easiest Way to Use GPT In Your Products | LangChain Basics Tutorial](https://youtu.be/fLy0VenZyGc) by [Rachel Woods](https://www.youtube.com/@therachelwoods)
 - [`BabyAGI` + `GPT-4` Langchain Agent with Internet Access](https://youtu.be/wx1z_hs5P6E) by [tylerwhatsgood](https://www.youtube.com/@tylerwhatsgood)
 - [Learning LLM Agents. How does it actually work? LangChain, AutoGPT & OpenAI](https://youtu.be/mb_YAABSplk) by [Arnoldas Kemeklis](https://www.youtube.com/@processusAI)
 - [Get Started with LangChain in `Node.js`](https://youtu.be/Wxx1KUWJFv4) by [Developers Digest](https://www.youtube.com/@DevelopersDigest)
 - [LangChain + `OpenAI` tutorial: Building a Q&A system w/ own text data](https://youtu.be/DYOU_Z0hAwo) by [Samuel Chan](https://www.youtube.com/@SamuelChan)
- [Langchain + `Zapier` Agent](https://youtu.be/yribLAb-pxA) by [Merk](https://www.youtube.com/@heymichaeldaigler)
+- [Langchain + `Zapier` Agent](https://youtu.be/yribLAb-pxA) by [Merk](https://www.youtube.com/@merksworld)
 - [Connecting the Internet with `ChatGPT` (LLMs) using Langchain And Answers Your Questions](https://youtu.be/9Y0TBC63yZg) by [Kamalraj M M](https://www.youtube.com/@insightbuilder)
 - [Build More Powerful LLM Applications for Business’s with LangChain (Beginners Guide)](https://youtu.be/sp3-WLKEcBg) by[ No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
 - [LangFlow LLM Agent Demo for 🦜🔗LangChain](https://youtu.be/zJxDHaWt-6o) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
@@ -82,7 +82,7 @@
 - [Build a LangChain-based Semantic PDF Search App with No-Code Tools Bubble and Flowise](https://youtu.be/s33v5cIeqA4) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
 - [LangChain Memory Tutorial | Building a ChatGPT Clone in Python](https://youtu.be/Cwq91cj2Pnc) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
 - [ChatGPT For Your DATA | Chat with Multiple Documents Using LangChain](https://youtu.be/TeDgIDqQmzs) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
- [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@heymichaeldaigler)
+- [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@merksworld)
 - [Using OpenAI, LangChain, and `Gradio` to Build Custom GenAI Applications](https://youtu.be/1MsmqMg3yUc) by [David Hundley](https://www.youtube.com/@dkhundley)
 - [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
 - [Build AI chatbot with custom knowledge base using OpenAI API and GPT Index](https://youtu.be/vDZAZuaXf48) by [Irina Nik](https://www.youtube.com/@irina_nik)
@@ -93,7 +93,7 @@
 - [Build a Custom Chatbot with OpenAI: `GPT-Index` & LangChain | Step-by-Step Tutorial](https://youtu.be/FIDv6nc4CgU) by [Fabrikod](https://www.youtube.com/@fabrikod)
 - [`Flowise` is an open-source no-code UI visual tool to build 🦜🔗LangChain applications](https://youtu.be/CovAPtQPU0k) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
 - [LangChain & GPT 4 For Data Analysis: The `Pandas` Dataframe Agent](https://youtu.be/rFQ5Kmkd4jc) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
- [`GirlfriendGPT` - AI girlfriend with LangChain](https://youtu.be/LiN3D1QZGQw) by [Girlfriend GPT](https://www.youtube.com/@girlfriendGPT)
+- [`GirlfriendGPT` - AI girlfriend with LangChain](https://youtu.be/LiN3D1QZGQw) by [Toolfinder AI](https://www.youtube.com/@toolfinderai)
 - [How to build with Langchain 10x easier | ⛓️ LangFlow & `Flowise`](https://youtu.be/Ya1oGL7ZTvU) by [AI Jason](https://www.youtube.com/@AIJasonZ)
 - [Getting Started With LangChain In 20 Minutes- Build Celebrity Search Application](https://youtu.be/_FpT1cwcSLg) by [Krish Naik](https://www.youtube.com/@krishnaik06)
 - ⛓ [Vector Embeddings Tutorial – Code Your Own AI Assistant with `GPT-4 API` + LangChain + NLP](https://youtu.be/yfHHvmaMkcA?si=5uJhxoh2tvdnOXok) by [FreeCodeCamp.org](https://www.youtube.com/@freecodecamp)
@@ -109,7 +109,7 @@
 - ⛓ [PyData Heidelberg #11 - TimeSeries Forecasting & LLM Langchain](https://www.youtube.com/live/Glbwb5Hxu18?si=PIEY8Raq_C9PCHuW) by [PyData](https://www.youtube.com/@PyDataTV)
 - ⛓ [Prompt Engineering in Web Development | Using LangChain and Templates with OpenAI](https://youtu.be/pK6WzlTOlYw?si=fkcDQsBG2h-DM8uQ) by [Akamai Developer
 ](https://www.youtube.com/@AkamaiDeveloper)
- ⛓ [Retrieval-Augmented Generation (RAG) using LangChain and `Pinecone` - The RAG Special Episode](https://youtu.be/J_tCD_J6w3s?si=60Mnr5VD9UED9bGG) by [Generative AI and Data Science On AWS](https://www.youtube.com/@GenerativeAIOnAWS)
+- ⛓ [Retrieval-Augmented Generation (RAG) using LangChain and `Pinecone` - The RAG Special Episode](https://youtu.be/J_tCD_J6w3s?si=60Mnr5VD9UED9bGG) by [Generative AI and Data Science On AWS](https://www.youtube.com/@GenerativeAIDataScienceOnAWS)
 - ⛓ [`LLAMA2 70b-chat` Multiple Documents Chatbot with Langchain & Streamlit |All OPEN SOURCE|Replicate API](https://youtu.be/vhghB81vViM?si=dszzJnArMeac7lyc) by [DataInsightEdge](https://www.youtube.com/@DataInsightEdge01)
 - ⛓ [Chatting with 44K Fashion Products: LangChain Opportunities and Pitfalls](https://youtu.be/Zudgske0F_s?si=8HSshHoEhh0PemJA) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
 - ⛓ [Structured Data Extraction from `ChatGPT` with LangChain](https://youtu.be/q1lYg8JISpQ?si=0HctzOHYZvq62sve) by [MG](https://www.youtube.com/@MG_cafe)
--- a/docs/docs/expression_language/cookbook/prompt_size.ipynb
+++ b/docs/docs/expression_language/cookbook/prompt_size.ipynb
@@ -220,7 +220,7 @@
   "id": "637f994a-5134-402a-bcf0-4de3911eaf49",
   "metadata": {},
   "source": [
-    ":::{.callout-tip}\n",
+    ":::tip\n",
    "\n",
    "[LangSmith trace](https://smith.langchain.com/public/60909eae-f4f1-43eb-9f96-354f5176f66f/r)\n",
    "\n",
@@ -388,7 +388,7 @@
   "id": "5a7e498b-dc68-4267-a35c-90ceffa91c46",
   "metadata": {},
   "source": [
-    ":::{.callout-tip}\n",
+    ":::tip\n",
    "\n",
    "[LangSmith trace](https://smith.langchain.com/public/3b27d47f-e4df-4afb-81b1-0f88b80ca97e/r)\n",
    "\n",
--- a/docs/docs/expression_language/get_started.ipynb
+++ b/docs/docs/expression_language/get_started.ipynb
@@ -40,33 +40,6 @@
    "%pip install --upgrade --quiet  langchain-core langchain-community langchain-openai"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "c3d54f72",
-   "metadata": {},
-   "source": [
-    "```{=mdx}\n",
-    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
-    "\n",
-    "<ChatModelTabs openaiParams={`model=\"gpt-4\"`} />\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f9eed8e8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | output: false\n",
-    "# | echo: false\n",
-    "\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "\n",
-    "model = ChatOpenAI(model=\"gpt-4\")"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 1,
@@ -87,8 +60,10 @@
   "source": [
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "prompt = ChatPromptTemplate.from_template(\"tell me a short joke about {topic}\")\n",
+    "model = ChatOpenAI(model=\"gpt-4\")\n",
    "output_parser = StrOutputParser()\n",
    "\n",
    "chain = prompt | model | output_parser\n",
@@ -101,15 +76,15 @@
   "id": "81c502c5-85ee-4f36-aaf4-d6e350b7792f",
   "metadata": {},
   "source": [
-    "Notice this line of the code, where we piece together these different components into a single chain using LCEL:\n",
+    "Notice this line of this code, where we piece together then different components into a single chain using LCEL:\n",
    "\n",
    "```\n",
    "chain = prompt | model | output_parser\n",
    "```\n",
    "\n",
-    "The `|` symbol is similar to a [unix pipe operator](https://en.wikipedia.org/wiki/Pipeline_(Unix)), which chains together the different components, feeding the output from one component as input into the next component. \n",
+    "The `|` symbol is similar to a [unix pipe operator](https://en.wikipedia.org/wiki/Pipeline_(Unix)), which chains together the different components feeds the output from one component as input into the next component. \n",
    "\n",
-    "In this chain the user input is passed to the prompt template, then the prompt template output is passed to the model, then the model output is passed to the output parser. Let's take a look at each component individually to really understand what's going on."
+    "In this chain the user input is passed to the prompt template, then the prompt template output is passed to the model, then the model output is passed to the output parser. Let's take a look at each component individually to really understand what's going on. "
   ]
  },
  {
@@ -258,7 +233,7 @@
    "### 3. Output parser\n",
    "\n",
    "And lastly we pass our `model` output to the `output_parser`, which is a `BaseOutputParser` meaning it takes either a string or a \n",
-    "`BaseMessage` as input. The specific `StrOutputParser` simply converts any input into a string."
+    "`BaseMessage` as input. The `StrOutputParser` specifically simple converts any input into a string."
   ]
  },
  {
@@ -318,7 +293,7 @@
   "source": [
    ":::info\n",
    "\n",
-    "Note that if you’re curious about the output of any components, you can always test out a smaller version of the chain such as `prompt` or `prompt | model` to see the intermediate results:\n",
+    "Note that if you’re curious about the output of any components, you can always test out a smaller version of the chain such as `prompt`  or `prompt | model` to see the intermediate results:\n",
    "\n",
    ":::"
   ]
@@ -346,17 +321,7 @@
   "source": [
    "## RAG Search Example\n",
    "\n",
-    "For our next example, we want to run a retrieval-augmented generation chain to add some context when responding to questions."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b8fe8eb4",
-   "metadata": {},
-   "source": [
-    "```{=mdx}\n",
-    "<ChatModelTabs />\n",
-    "```"
+    "For our next example, we want to run a retrieval-augmented generation chain to add some context when responding to questions. "
   ]
  },
  {
@@ -373,7 +338,7 @@
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_core.runnables import RunnableParallel, RunnablePassthrough\n",
-    "from langchain_openai import OpenAIEmbeddings\n",
+    "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
    "\n",
    "vectorstore = DocArrayInMemorySearch.from_texts(\n",
    "    [\"harrison worked at kensho\", \"bears like to eat honey\"],\n",
@@ -387,6 +352,7 @@
    "Question: {question}\n",
    "\"\"\"\n",
    "prompt = ChatPromptTemplate.from_template(template)\n",
+    "model = ChatOpenAI()\n",
    "output_parser = StrOutputParser()\n",
    "\n",
    "setup_and_retrieval = RunnableParallel(\n",
@@ -484,7 +450,7 @@
    "With the flow being:\n",
    "\n",
    "1. The first steps create a `RunnableParallel` object with two entries.  The first entry, `context` will include the document results fetched by the retriever. The second entry, `question` will contain the user’s original question. To pass on the question, we use `RunnablePassthrough` to copy this entry. \n",
-    "2. Feed the dictionary from the step above to the `prompt` component. It then takes the user input which is `question` as well as the retrieved document which is `context` to construct a prompt and output a PromptValue. \n",
+    "2. Feed the dictionary from the step above to the `prompt` component. It then takes the user input which is `question` as well as the retrieved document which is `context` to construct a prompt and output a PromptValue.  \n",
    "3. The `model` component takes the generated prompt, and passes into the OpenAI LLM model for evaluation. The generated output from the model is a `ChatMessage` object. \n",
    "4. Finally, the `output_parser` component takes in a `ChatMessage`, and transforms this into a Python string, which is returned from the invoke method.\n",
    "\n",
@@ -529,7 +495,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.11.4"
  }
 },
 "nbformat": 4,
--- a/docs/docs/expression_language/how_to/message_history.ipynb
+++ b/docs/docs/expression_language/how_to/message_history.ipynb
@@ -552,7 +552,7 @@
   "id": "da3d1feb-b4bb-4624-961c-7db2e1180df7",
   "metadata": {},
   "source": [
-    ":::{.callout-tip}\n",
+    ":::tip\n",
    "\n",
    "[Langsmith trace](https://smith.langchain.com/public/bd73e122-6ec1-48b2-82df-e6483dc9cb63/r)\n",
    "\n",
--- a/docs/docs/expression_language/why.ipynb
+++ b/docs/docs/expression_language/why.ipynb
--- a/docs/docs/get_started/quickstart.mdx
+++ b/docs/docs/get_started/quickstart.mdx
@@ -14,7 +14,7 @@ That's a fair amount to cover! Let's dive in.

 ### Jupyter Notebook

-This guide (and most of the other guides in the documentation) uses [Jupyter notebooks](https://jupyter.org/) and assumes the reader is as well. Jupyter notebooks are perfect for learning how to work with LLM systems because oftentimes things can go wrong (unexpected output, API down, etc) and going through guides in an interactive environment is a great way to better understand them.
+This guide (and most of the other guides in the documentation) use [Jupyter notebooks](https://jupyter.org/) and assume the reader is as well. Jupyter notebooks are perfect for learning how to work with LLM systems because often times things can go wrong (unexpected output, API down, etc) and going through guides in an interactive environment is a great way to better understand them.

 You do not NEED to go through the guide in a Jupyter Notebook, but it is recommended. See [here](https://jupyter.org/install) for instructions on how to install.

@@ -184,8 +184,8 @@ Let's ask it what LangSmith is - this is something that wasn't present in the tr
 llm.invoke("how can langsmith help with testing?")
 ```

-We can also guide its response with a prompt template.
-Prompt templates convert raw user input to better input to the LLM.
+We can also guide it's response with a prompt template.
+Prompt templates are used to convert raw user input to a better input to the LLM.

 ```python
 from langchain_core.prompts import ChatPromptTemplate
@@ -234,7 +234,7 @@ We've now successfully set up a basic LLM chain. We only touched on the basics o

 ## Retrieval Chain

-To properly answer the original question ("how can langsmith help with testing?"), we need to provide additional context to the LLM.
+In order to properly answer the original question ("how can langsmith help with testing?"), we need to provide additional context to the LLM.
 We can do this via *retrieval*.
 Retrieval is useful when you have **too much data** to pass to the LLM directly.
 You can then use a retriever to fetch only the most relevant pieces and pass those in.
@@ -242,7 +242,7 @@ You can then use a retriever to fetch only the most relevant pieces and pass tho
 In this process, we will look up relevant documents from a *Retriever* and then pass them into the prompt.
 A Retriever can be backed by anything - a SQL table, the internet, etc - but in this instance we will populate a vector store and use that as a retriever. For more information on vectorstores, see [this documentation](/docs/modules/data_connection/vectorstores).

-First, we need to load the data that we want to index. To do this, we will use the WebBaseLoader. This requires installing [BeautifulSoup](https://beautiful-soup-4.readthedocs.io/en/latest/):
+First, we need to load the data that we want to index. In order to do this, we will use the WebBaseLoader. This requires installing [BeautifulSoup](https://beautiful-soup-4.readthedocs.io/en/latest/):

 ```shell
 pip install beautifulsoup4
@@ -349,7 +349,7 @@ document_chain.invoke({
 ```

 However, we want the documents to first come from the retriever we just set up.
-That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.
+That way, for a given question we can use the retriever to dynamically select the most relevant documents and pass those in.

 ```python
 from langchain.chains import create_retrieval_chain
@@ -395,12 +395,12 @@ from langchain_core.prompts import MessagesPlaceholder
 prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
-    ("user", "Given the above conversation, generate a search query to look up to get information relevant to the conversation")
+    ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
 ])
 retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
 ```

-We can test this out by passing in an instance where the user asks a follow-up question.
+We can test this out by passing in an instance where the user is asking a follow up question.

 ```python
 from langchain_core.messages import HumanMessage, AIMessage
@@ -411,7 +411,7 @@ retriever_chain.invoke({
    "input": "Tell me how"
 })
 ```
-You should see that this returns documents about testing in LangSmith. This is because the LLM generated a new query, combining the chat history with the follow-up question.
+You should see that this returns documents about testing in LangSmith. This is because the LLM generated a new query, combining the chat history with the follow up question.

 Now that we have this new retriever, we can create a new chain to continue the conversation with these retrieved documents in mind.

@@ -439,7 +439,7 @@ We can see that this gives a coherent answer - we've successfully turned our ret

 ## Agent

-We've so far created examples of chains - where each step is known ahead of time.
+We've so far create examples of chains - where each step is known ahead of time.
 The final thing we will create is an agent - where the LLM decides what steps to take.

 **NOTE: for this example we will only show how to create an agent using OpenAI models, as local models are not reliable enough yet.**
@@ -448,7 +448,7 @@ One of the first things to do when building an agent is to decide what tools it
 For this example, we will give the agent access to two tools:

 1. The retriever we just created. This will let it easily answer questions about LangSmith
-2. A search tool. This will let it easily answer questions that require up-to-date information.
+2. A search tool. This will let it easily answer questions that require up to date information.

 First, let's set up a tool for the retriever we just created:

@@ -488,11 +488,6 @@ Install langchain hub first
 ```bash
 pip install langchainhub
 ```
-Install the langchain-openai package
-To interact with OpenAI we need to use langchain-openai which connects with OpenAI SDK[https://github.com/langchain-ai/langchain/tree/master/libs/partners/openai].  
-```bash
-pip install langchain-openai
-```

 Now we can use it to get a predefined prompt

@@ -504,8 +499,6 @@ from langchain.agents import AgentExecutor

 # Get the prompt to use - you can modify this!
 prompt = hub.pull("hwchase17/openai-functions-agent")
-
-# You need to set OPENAI_API_KEY environment variable or pass it as argument `openai_api_key`.
 llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
 agent = create_openai_functions_agent(llm, tools, prompt)
 agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
--- a/docs/docs/guides/evaluation/comparison/index.mdx
+++ b/docs/docs/guides/evaluation/comparison/index.mdx
@@ -17,7 +17,7 @@ Here's a summary of the key methods and properties of a comparison evaluator:
 - `requires_reference`: This property specifies whether this evaluator requires a reference label.

 :::note LangSmith Support
-The [run_on_dataset](https://api.python.langchain.com/en/latest/langchain_api_reference.html#module-langchain.smith) evaluation method is designed to evaluate only a single model at a time, and thus, doesn't support these evaluators.
+The [run_on_dataset](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.smith) evaluation method is designed to evaluate only a single model at a time, and thus, doesn't support these evaluators.
 :::

 Detailed information about creating custom evaluators and the available built-in comparison evaluators is provided in the following sections.
--- a/docs/docs/guides/evaluation/index.mdx
+++ b/docs/docs/guides/evaluation/index.mdx
@@ -37,6 +37,6 @@ Check out the docs for examples and leaderboard information.

 ## Reference Docs

-For detailed information on the available evaluators, including how to instantiate, configure, and customize them, check out the [reference documentation](https://api.python.langchain.com/en/latest/langchain_api_reference.html#module-langchain.evaluation) directly.
+For detailed information on the available evaluators, including how to instantiate, configure, and customize them, check out the [reference documentation](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.evaluation) directly.

 <DocCardList />
--- a/docs/docs/guides/safety/constitutional_chain.mdx
+++ b/docs/docs/guides/safety/constitutional_chain.mdx
@@ -88,6 +88,11 @@ constitutional_chain.run(question="How can I steal kittens?")

 ## Unified Objective

+We also have built-in support for the Unified Objectives proposed in this paper: [examine.dev/docs/Unified_objectives.pdf](https://examine.dev/docs/Unified_objectives.pdf)
+
+Some of these are useful for the same idea of correcting ethical issues.
+
+
 ```python
 principles = ConstitutionalChain.get_principles(["uo-ethics-1"])
 constitutional_chain = ConstitutionalChain.from_llm(
--- a/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb
+++ b/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb
@@ -40,7 +40,7 @@
    "You must [deploy a model on Azure ML](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-foundation-models?view=azureml-api-2#deploying-foundation-models-to-endpoints-for-inferencing) or [to Azure AI studio](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-open) and obtain the following parameters:\n",
    "\n",
    "* `endpoint_url`: The REST endpoint url provided by the endpoint.\n",
-    "* `endpoint_api_type`: Use `endpoint_type='dedicated'` when deploying models to **Dedicated endpoints** (hosted managed infrastructure). Use `endpoint_type='serverless'` when deploying models using the **Pay-as-you-go** offering (model as a service).\n",
+    "* `endpoint_api_type`: Use `endpoint_type='realtime'` when deploying models to **Realtime endpoints** (hosted managed infrastructure). Use `endpoint_type='serverless'` when deploying models using the **Pay-as-you-go** offering (model as a service).\n",
    "* `endpoint_api_key`: The API key provided by the endpoint"
   ]
  },
@@ -52,9 +52,9 @@
    "\n",
    "The `content_formatter` parameter is a handler class for transforming the request and response of an AzureML endpoint to match with required schema. Since there are a wide range of models in the model catalog, each of which may process data differently from one another, a `ContentFormatterBase` class is provided to allow users to transform data to their liking. The following content formatters are provided:\n",
    "\n",
-    "* `CustomOpenAIChatContentFormatter`: Formats request and response data for models like LLaMa2-chat that follow the OpenAI API spec for request and response.\n",
+    "* `LLamaChatContentFormatter`: Formats request and response data for LLaMa2-chat\n",
    "\n",
-    "*Note: `langchain.chat_models.azureml_endpoint.LlamaChatContentFormatter` is being deprecated and replaced with `langchain.chat_models.azureml_endpoint.CustomOpenAIChatContentFormatter`.*\n",
+    "*Note: `langchain.chat_models.azureml_endpoint.LLamaContentFormatter` is being deprecated and replaced with `langchain.chat_models.azureml_endpoint.LLamaChatContentFormatter`.*\n",
    "\n",
    "You can implement custom content formatters specific for your model deriving from the class `langchain_community.llms.azureml_endpoint.ContentFormatterBase`."
   ]
@@ -65,7 +65,20 @@
   "source": [
    "## Examples\n",
    "\n",
-    "The following section contains examples about how to use this class:"
+    "The following section cotain examples about how to use this class:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.chat_models.azureml_endpoint import (\n",
+    "    AzureMLEndpointApiType,\n",
+    "    LlamaChatContentFormatter,\n",
+    ")\n",
+    "from langchain_core.messages import HumanMessage"
   ]
  },
  {
@@ -92,17 +105,14 @@
    }
   ],
   "source": [
-    "from langchain_community.chat_models.azureml_endpoint import (\n",
-    "    AzureMLEndpointApiType,\n",
-    "    CustomOpenAIChatContentFormatter,\n",
-    ")\n",
+    "from langchain_community.chat_models.azureml_endpoint import LlamaContentFormatter\n",
    "from langchain_core.messages import HumanMessage\n",
    "\n",
    "chat = AzureMLChatOnlineEndpoint(\n",
    "    endpoint_url=\"https://<your-endpoint>.<your_region>.inference.ml.azure.com/score\",\n",
-    "    endpoint_api_type=AzureMLEndpointApiType.dedicated,\n",
+    "    endpoint_api_type=AzureMLEndpointApiType.realtime,\n",
    "    endpoint_api_key=\"my-api-key\",\n",
-    "    content_formatter=CustomOpenAIChatContentFormatter(),\n",
+    "    content_formatter=LlamaChatContentFormatter(),\n",
    ")\n",
    "response = chat.invoke(\n",
    "    [HumanMessage(content=\"Will the Collatz conjecture ever be solved?\")]\n",
@@ -127,7 +137,7 @@
    "    endpoint_url=\"https://<your-endpoint>.<your_region>.inference.ml.azure.com/v1/chat/completions\",\n",
    "    endpoint_api_type=AzureMLEndpointApiType.serverless,\n",
    "    endpoint_api_key=\"my-api-key\",\n",
-    "    content_formatter=CustomOpenAIChatContentFormatter,\n",
+    "    content_formatter=LlamaChatContentFormatter,\n",
    ")\n",
    "response = chat.invoke(\n",
    "    [HumanMessage(content=\"Will the Collatz conjecture ever be solved?\")]\n",
@@ -139,7 +149,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "If you need to pass additional parameters to the model, use `model_kwargs` argument:"
+    "If you need to pass additional parameters to the model, use `model_kwards` argument:"
   ]
  },
  {
@@ -152,7 +162,7 @@
    "    endpoint_url=\"https://<your-endpoint>.<your_region>.inference.ml.azure.com/v1/chat/completions\",\n",
    "    endpoint_api_type=AzureMLEndpointApiType.serverless,\n",
    "    endpoint_api_key=\"my-api-key\",\n",
-    "    content_formatter=CustomOpenAIChatContentFormatter,\n",
+    "    content_formatter=LlamaChatContentFormatter,\n",
    "    model_kwargs={\"temperature\": 0.8},\n",
    ")"
   ]
@@ -194,7 +204,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/chat/moonshot.ipynb
+++ b/docs/docs/integrations/chat/moonshot.ipynb
@@ -1,86 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "raw",
-   "metadata": {},
-   "source": [
-    "---\n",
-    "sidebar_label: Moonshot\n",
-    "---"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": false
-   },
-   "source": [
-    "# MoonshotChat\n",
-    "\n",
-    "[Moonshot](https://platform.moonshot.cn/) is a Chinese startup that provides LLM service for companies and individuals.\n",
-    "\n",
-    "This example goes over how to use LangChain to interact with Moonshot Inference for Chat."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "# Generate your api key from: https://platform.moonshot.cn/console/api-keys\n",
-    "os.environ[\"MOONSHOT_API_KEY\"] = \"MOONSHOT_API_KEY\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.chat_models.moonshot import MoonshotChat\n",
-    "from langchain_core.messages import HumanMessage, SystemMessage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chat = MoonshotChat()\n",
-    "# or use a specific model\n",
-    "# Available models: https://platform.moonshot.cn/docs\n",
-    "# chat = MoonshotChat(model=\"moonshot-v1-128k\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "messages = [\n",
-    "    SystemMessage(\n",
-    "        content=\"You are a helpful assistant that translates English to French.\"\n",
-    "    ),\n",
-    "    HumanMessage(\n",
-    "        content=\"Translate this sentence from English to French. I love programming.\"\n",
-    "    ),\n",
-    "]\n",
-    "\n",
-    "chat.invoke(messages)"
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/docs/docs/integrations/chat/solar.ipynb
+++ b/docs/docs/integrations/chat/solar.ipynb
@@ -1,80 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "a9667088-04e1-4f67-8221-a0072a2d635f",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-03-06T17:04:59.273702Z",
-     "iopub.status.busy": "2024-03-06T17:04:59.272602Z",
-     "iopub.status.idle": "2024-03-06T17:05:00.129177Z",
-     "shell.execute_reply": "2024-03-06T17:05:00.124594Z",
-     "shell.execute_reply.started": "2024-03-06T17:04:59.273646Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content='저는 대형 언어 모델 프로젝트를 구축하고 싶습니다.')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import os\n",
-    "\n",
-    "os.environ[\"SOLAR_API_KEY\"] = \"SOLAR_API_KEY\"\n",
-    "\n",
-    "from langchain_community.chat_models.solar import SolarChat\n",
-    "from langchain_core.messages import HumanMessage, SystemMessage\n",
-    "\n",
-    "chat = SolarChat(max_tokens=1024)\n",
-    "\n",
-    "messages = [\n",
-    "    SystemMessage(\n",
-    "        content=\"You are a helpful assistant who translates English to Korean.\"\n",
-    "    ),\n",
-    "    HumanMessage(\n",
-    "        content=\"Translate this sentence from English to Korean. I want to build a project of large language model.\"\n",
-    "    ),\n",
-    "]\n",
-    "\n",
-    "chat.invoke(messages)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8cb792fe-2844-4969-a9e9-f4c0f97b1699",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/integrations/document_loaders/bilibili.ipynb
+++ b/docs/docs/integrations/document_loaders/bilibili.ipynb
--- a/docs/docs/integrations/document_loaders/google_drive.ipynb
+++ b/docs/docs/integrations/document_loaders/google_drive.ipynb
@@ -19,11 +19,9 @@
    "1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
    "\n",
    "## 🧑 Instructions for ingesting your Google Docs data\n",
-    "Set the environmental variable `GOOGLE_APPLICATION_CREDENTIALS` to an empty string (`\"\"`).\n",
+    "By default, the `GoogleDriveLoader` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `credentials_path` keyword argument. Same thing with `token.json` - `token_path`. Note that `token.json` will be created automatically the first time you use the loader.\n",
    "\n",
-    "By default, the `GoogleDriveLoader` expects the `credentials.json` file to be located at `~/.credentials/credentials.json`, but this is configurable using the `credentials_path` keyword argument. Same thing with `token.json` - default path: `~/.credentials/token.json`, constructor param: `token_path`.\n",
-    "\n",
-    "The first time you use GoogleDriveLoader, you will be displayed with the consent screen in your browser for user authentication. After authentication, `token.json` will be created automatically at the provided or the default path. Also, if there is already a `token.json` at that path, then you will not be prompted for authentication.\n",
+    "The first time you use GoogleDriveLoader, you will be displayed with the consent screen in your browser. If this doesn't happen and you get a `RefreshError`, do not use `credentials_path` in your `GoogleDriveLoader` constructor call. Instead, put that path in a `GOOGLE_APPLICATION_CREDENTIALS` environmental variable.\n",
    "\n",
    "`GoogleDriveLoader` can load from a list of Google Docs document ids or a folder id. You can obtain your folder and document id from the URL:\n",
    "\n",
--- a/docs/docs/integrations/document_loaders/llmsherpa.ipynb
+++ b/docs/docs/integrations/document_loaders/llmsherpa.ipynb
@@ -1,419 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "7f5437a835409a57",
-   "metadata": {
-    "collapsed": false
-   },
-   "source": [
-    "# LLM Sherpa\n",
-    "\n",
-    "This notebook covers how to use `LLM Sherpa` to load files of many types. `LLM Sherpa` supports different file formats including DOCX, PPTX, HTML, TXT, and XML.\n",
-    "\n",
-    "`LLMSherpaFileLoader` use LayoutPDFReader, which is part of the LLMSherpa library. This tool is designed to parse PDFs while preserving their layout information, which is often lost when using most PDF to text parsers.\n",
-    "\n",
-    "Here are some key features of LayoutPDFReader:\n",
-    "\n",
-    "* It can identify and extract sections and subsections along with their levels.\n",
-    "* It combines lines to form paragraphs.\n",
-    "* It can identify links between sections and paragraphs.\n",
-    "* It can extract tables along with the section the tables are found in.\n",
-    "* It can identify and extract lists and nested lists.\n",
-    "* It can join content spread across pages.\n",
-    "* It can remove repeating headers and footers.\n",
-    "* It can remove watermarks.\n",
-    "\n",
-    "check [llmsherpa](https://llmsherpa.readthedocs.io/en/latest/) documentation.\n",
-    "\n",
-    "`INFO: this library fail with some pdf files so use it with caution.`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "initial_id",
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# Install package\n",
-    "# !pip install --upgrade --quiet llmsherpa"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "baa8d2672ac6dd4b",
-   "metadata": {
-    "collapsed": false
-   },
-   "source": [
-    "## LLMSherpaFileLoader\n",
-    "\n",
-    "Under the hood LLMSherpaFileLoader defined some strategist to load file content: [\"sections\", \"chunks\", \"html\", \"text\"], setup [nlm-ingestor](https://github.com/nlmatics/nlm-ingestor) to get `llmsherpa_api_url` or use the default."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6fb0104dde44091b",
-   "metadata": {
-    "collapsed": false
-   },
-   "source": [
-    "### sections strategy: return the file parsed into sections"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "14150b3110143a43",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:06:03.648268Z",
-     "start_time": "2024-03-28T23:05:51.734372Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "from langchain_community.document_loaders.llmsherpa import LLMSherpaFileLoader\n",
-    "\n",
-    "loader = LLMSherpaFileLoader(\n",
-    "    file_path=\"https://arxiv.org/pdf/2402.14207.pdf\",\n",
-    "    new_indent_parser=True,\n",
-    "    apply_ocr=True,\n",
-    "    strategy=\"sections\",\n",
-    "    llmsherpa_api_url=\"http://localhost:5010/api/parseDocument?renderFormat=all\",\n",
-    ")\n",
-    "docs = loader.load()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "e639aa0010ed3579",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:06:11.568739Z",
-     "start_time": "2024-03-28T23:06:11.557702Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "Document(page_content='Abstract\\nWe study how to apply large language models to write grounded and organized long-form articles from scratch, with comparable breadth and depth to Wikipedia pages.\\nThis underexplored problem poses new challenges at the pre-writing stage, including how to research the topic and prepare an outline prior to writing.\\nWe propose STORM, a writing system for the Synthesis of Topic Outlines through\\nReferences\\nFull-length Article\\nTopic\\nOutline\\n2022 Winter Olympics\\nOpening Ceremony\\nResearch via Question Asking\\nRetrieval and Multi-perspective Question Asking.\\nSTORM models the pre-writing stage by\\nLLM\\n(1) discovering diverse perspectives in researching the given topic, (2) simulating conversations where writers carrying different perspectives pose questions to a topic expert grounded on trusted Internet sources, (3) curating the collected information to create an outline.\\nFor evaluation, we curate FreshWiki, a dataset of recent high-quality Wikipedia articles, and formulate outline assessments to evaluate the pre-writing stage.\\nWe further gather feedback from experienced Wikipedia editors.\\nCompared to articles generated by an outlinedriven retrieval-augmented baseline, more of STORM’s articles are deemed to be organized (by a 25% absolute increase) and broad in coverage (by 10%).\\nThe expert feedback also helps identify new challenges for generating grounded long articles, such as source bias transfer and over-association of unrelated facts.\\n1. Can you provide any information about the transportation arrangements for the opening ceremony?\\nLLM\\n2. Can you provide any information about the budget for the 2022 Winter Olympics opening ceremony?…\\nLLM- Role1\\nLLM- Role2\\nLLM- Role1', metadata={'source': 'https://arxiv.org/pdf/2402.14207.pdf', 'section_number': 1, 'section_title': 'Abstract'})"
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "docs[1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "818977c1a0505814",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:06:28.900386Z",
-     "start_time": "2024-03-28T23:06:28.891805Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "79"
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(docs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e424ce828ea64c01",
-   "metadata": {
-    "collapsed": false
-   },
-   "source": [
-    "### chunks strategy: return the file parsed into chunks"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "4c0ff1a52b9dd4e3",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:06:44.507836Z",
-     "start_time": "2024-03-28T23:06:32.507326Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "from langchain_community.document_loaders.llmsherpa import LLMSherpaFileLoader\n",
-    "\n",
-    "loader = LLMSherpaFileLoader(\n",
-    "    file_path=\"https://arxiv.org/pdf/2402.14207.pdf\",\n",
-    "    new_indent_parser=True,\n",
-    "    apply_ocr=True,\n",
-    "    strategy=\"chunks\",\n",
-    "    llmsherpa_api_url=\"http://localhost:5010/api/parseDocument?renderFormat=all\",\n",
-    ")\n",
-    "docs = loader.load()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "33dc25e83f6e0430",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:06:49.951741Z",
-     "start_time": "2024-03-28T23:06:49.938331Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "Document(page_content='Assisting in Writing Wikipedia-like Articles From Scratch with Large Language Models\\nStanford University {shaoyj, yuchengj, tkanell, peterxu, okhattab}@stanford.edu lam@cs.stanford.edu', metadata={'source': 'https://arxiv.org/pdf/2402.14207.pdf', 'chunk_number': 1, 'chunk_type': 'para'})"
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "docs[1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "2310e24f3d081cb4",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:06:56.933007Z",
-     "start_time": "2024-03-28T23:06:56.922196Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "306"
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(docs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6bb9b715b0d2b4b0",
-   "metadata": {
-    "collapsed": false
-   },
-   "source": [
-    "### html strategy: return the file as one html document"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "f3fbe9f3c4d8a6ee",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T22:59:15.869599Z",
-     "start_time": "2024-03-28T22:58:54.306814Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "from langchain_community.document_loaders.llmsherpa import LLMSherpaFileLoader\n",
-    "\n",
-    "loader = LLMSherpaFileLoader(\n",
-    "    file_path=\"https://arxiv.org/pdf/2402.14207.pdf\",\n",
-    "    new_indent_parser=True,\n",
-    "    apply_ocr=True,\n",
-    "    strategy=\"html\",\n",
-    "    llmsherpa_api_url=\"http://localhost:5010/api/parseDocument?renderFormat=all\",\n",
-    ")\n",
-    "docs = loader.load()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "b8fcbfcd58126e09",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T22:59:33.386455Z",
-     "start_time": "2024-03-28T22:59:33.381274Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "'<html><h1>Assisting in Writing Wikipedia-like Articles From Scratch with Large Language Models</h1><table><th><td colSpan=1>Yijia Shao</td><td colSpan=1>Yucheng Jiang</td><td colSpan=1>Theodore A. Kanell</td><td colSpan=1>Peter Xu</td></th><tr><td colSpan=1></td><td colSpan=1>Omar Khattab</td><td colSpan=1>Monica S. Lam</td><td colSpan=1></td></tr></table><p>Stanford University {shaoyj, yuchengj, '"
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "docs[0].page_content[:400]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "8cbe691320144cf6",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T22:59:49.667979Z",
-     "start_time": "2024-03-28T22:59:49.661572Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "1"
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(docs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "634af5a1c58a7766",
-   "metadata": {
-    "collapsed": false
-   },
-   "source": [
-    "### text strategy: return the file as one text document"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "ee47c6e36c952534",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:04:56.549898Z",
-     "start_time": "2024-03-28T23:04:38.148264Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "from langchain_community.document_loaders.llmsherpa import LLMSherpaFileLoader\n",
-    "\n",
-    "loader = LLMSherpaFileLoader(\n",
-    "    file_path=\"https://arxiv.org/pdf/2402.14207.pdf\",\n",
-    "    new_indent_parser=True,\n",
-    "    apply_ocr=True,\n",
-    "    strategy=\"text\",\n",
-    "    llmsherpa_api_url=\"http://localhost:5010/api/parseDocument?renderFormat=all\",\n",
-    ")\n",
-    "docs = loader.load()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "998649675f14c50e",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:05:28.558467Z",
-     "start_time": "2024-03-28T23:05:28.543132Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "'Assisting in Writing Wikipedia-like Articles From Scratch with Large Language Models\\n | Yijia Shao | Yucheng Jiang | Theodore A. Kanell | Peter Xu\\n | --- | --- | --- | ---\\n |  | Omar Khattab | Monica S. Lam | \\n\\nStanford University {shaoyj, yuchengj, tkanell, peterxu, okhattab}@stanford.edu lam@cs.stanford.edu\\nAbstract\\nWe study how to apply large language models to write grounded and organized long'"
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "docs[0].page_content[:400]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "7fec7a95023ea8e9",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-03-28T23:05:39.207693Z",
-     "start_time": "2024-03-28T23:05:39.199663Z"
-    },
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "1"
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(docs)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/integrations/document_loaders/sitemap.ipynb
+++ b/docs/docs/integrations/document_loaders/sitemap.ipynb
@@ -34,7 +34,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -62,7 +62,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -73,7 +73,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@@ -107,7 +107,7 @@
   "outputs": [],
   "source": [
    "loader = SitemapLoader(\n",
-    "    web_path=\"https://api.python.langchain.com/sitemap.xml\",\n",
+    "    web_path=\" https://api.python.langchain.com/sitemap.xml\",\n",
    "    filter_urls=[\"https://api.python.langchain.com/en/latest\"],\n",
    ")\n",
    "documents = loader.load()"
@@ -247,7 +247,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.9.18"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/llms/azure_ml.ipynb
+++ b/docs/docs/integrations/llms/azure_ml.ipynb
@@ -29,7 +29,7 @@
    "You must [deploy a model on Azure ML](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-foundation-models?view=azureml-api-2#deploying-foundation-models-to-endpoints-for-inferencing) or [to Azure AI studio](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-open) and obtain the following parameters:\n",
    "\n",
    "* `endpoint_url`: The REST endpoint url provided by the endpoint.\n",
-    "* `endpoint_api_type`: Use `endpoint_type='dedicated'` when deploying models to **Dedicated endpoints** (hosted managed infrastructure). Use `endpoint_type='serverless'` when deploying models using the **Pay-as-you-go** offering (model as a service).\n",
+    "* `endpoint_api_type`: Use `endpoint_type='realtime'` when deploying models to **Realtime endpoints** (hosted managed infrastructure). Use `endpoint_type='serverless'` when deploying models using the **Pay-as-you-go** offering (model as a service).\n",
    "* `endpoint_api_key`: The API key provided by the endpoint.\n",
    "* `deployment_name`: (Optional) The deployment name of the model using the endpoint."
   ]
@@ -45,7 +45,7 @@
    "* `GPT2ContentFormatter`: Formats request and response data for GPT2\n",
    "* `DollyContentFormatter`: Formats request and response data for the Dolly-v2\n",
    "* `HFContentFormatter`: Formats request and response data for text-generation Hugging Face models\n",
-    "* `CustomOpenAIContentFormatter`: Formats request and response data for models like LLaMa2 that follow OpenAI API compatible scheme.\n",
+    "* `LLamaContentFormatter`: Formats request and response data for LLaMa2\n",
    "\n",
    "*Note: `OSSContentFormatter` is being deprecated and replaced with `GPT2ContentFormatter`. The logic is the same but `GPT2ContentFormatter` is a more suitable name. You can still continue to use `OSSContentFormatter` as the changes are backwards compatible.*"
   ]
@@ -72,15 +72,15 @@
   "source": [
    "from langchain_community.llms.azureml_endpoint import (\n",
    "    AzureMLEndpointApiType,\n",
-    "    CustomOpenAIContentFormatter,\n",
+    "    LlamaContentFormatter,\n",
    ")\n",
    "from langchain_core.messages import HumanMessage\n",
    "\n",
    "llm = AzureMLOnlineEndpoint(\n",
    "    endpoint_url=\"https://<your-endpoint>.<your_region>.inference.ml.azure.com/score\",\n",
-    "    endpoint_api_type=AzureMLEndpointApiType.dedicated,\n",
+    "    endpoint_api_type=AzureMLEndpointApiType.realtime,\n",
    "    endpoint_api_key=\"my-api-key\",\n",
-    "    content_formatter=CustomOpenAIContentFormatter(),\n",
+    "    content_formatter=LlamaContentFormatter(),\n",
    "    model_kwargs={\"temperature\": 0.8, \"max_new_tokens\": 400},\n",
    ")\n",
    "response = llm.invoke(\"Write me a song about sparkling water:\")\n",
@@ -119,7 +119,7 @@
   "source": [
    "from langchain_community.llms.azureml_endpoint import (\n",
    "    AzureMLEndpointApiType,\n",
-    "    CustomOpenAIContentFormatter,\n",
+    "    LlamaContentFormatter,\n",
    ")\n",
    "from langchain_core.messages import HumanMessage\n",
    "\n",
@@ -127,7 +127,7 @@
    "    endpoint_url=\"https://<your-endpoint>.<your_region>.inference.ml.azure.com/v1/completions\",\n",
    "    endpoint_api_type=AzureMLEndpointApiType.serverless,\n",
    "    endpoint_api_key=\"my-api-key\",\n",
-    "    content_formatter=CustomOpenAIContentFormatter(),\n",
+    "    content_formatter=LlamaContentFormatter(),\n",
    "    model_kwargs={\"temperature\": 0.8, \"max_new_tokens\": 400},\n",
    ")\n",
    "response = llm.invoke(\"Write me a song about sparkling water:\")\n",
@@ -181,7 +181,7 @@
    "content_formatter = CustomFormatter()\n",
    "\n",
    "llm = AzureMLOnlineEndpoint(\n",
-    "    endpoint_api_type=\"dedicated\",\n",
+    "    endpoint_api_type=\"realtime\",\n",
    "    endpoint_api_key=os.getenv(\"BART_ENDPOINT_API_KEY\"),\n",
    "    endpoint_url=os.getenv(\"BART_ENDPOINT_URL\"),\n",
    "    model_kwargs={\"temperature\": 0.8, \"max_new_tokens\": 400},\n",
--- a/docs/docs/integrations/llms/ipex_llm.ipynb
+++ b/docs/docs/integrations/llms/ipex_llm.ipynb
@@ -4,11 +4,11 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# IPEX-LLM\n",
+    "# BigDL-LLM\n",
    "\n",
-    "> [IPEX-LLM](https://github.com/intel-analytics/ipex-llm/) is a low-bit LLM optimization library on Intel XPU (Xeon/Core/Flex/Arc/Max). It can make LLMs run extremely fast and consume much less memory on Intel platforms. It is open sourced under Apache 2.0 License.\n",
+    "> [BigDL-LLM](https://github.com/intel-analytics/BigDL/) is a low-bit LLM optimization library on Intel XPU (Xeon/Core/Flex/Arc/Max). It can make LLMs run extremely fast and consume much less memory on Intel platforms. It is open sourced under Apache 2.0 License.\n",
    "\n",
-    "This example goes over how to use LangChain to interact with IPEX-LLM for text generation. \n"
+    "This example goes over how to use LangChain to interact with BigDL-LLM for text generation. \n"
   ]
  },
  {
@@ -33,7 +33,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Install IEPX-LLM for running LLMs locally on Intel CPU."
+    "Install BigDL-LLM for running LLMs locally on Intel CPU."
   ]
  },
  {
@@ -42,7 +42,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --pre --upgrade ipex-llm[all]"
+    "# Install BigDL\n",
+    "%pip install --pre --upgrade bigdl-llm[all]"
   ]
  },
  {
@@ -59,7 +60,7 @@
   "outputs": [],
   "source": [
    "from langchain.chains import LLMChain\n",
-    "from langchain_community.llms import IpexLLM\n",
+    "from langchain_community.llms.bigdl import BigdlLLM\n",
    "from langchain_core.prompts import PromptTemplate"
   ]
  },
@@ -88,7 +89,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "27c08180714a44c7ab766624d5054163",
+       "model_id": "69e018750ffb4de1af22ce49cd6957f4",
       "version_major": 2,
       "version_minor": 0
      },
@@ -103,12 +104,13 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "2024-03-27 00:58:43,670 - INFO - Converting the current model to sym_int4 format......\n"
+      "2024-02-23 18:10:22,896 - INFO - Converting the current model to sym_int4 format......\n",
+      "2024-02-23 18:10:25,415 - INFO - BIGDL_OPT_IPEX: False\n"
     ]
    }
   ],
   "source": [
-    "llm = IpexLLM.from_model_id(\n",
+    "llm = BigdlLLM.from_model_id(\n",
    "    model_id=\"lmsys/vicuna-7b-v1.5\",\n",
    "    model_kwargs={\"temperature\": 0, \"max_length\": 64, \"trust_remote_code\": True},\n",
    ")"
@@ -133,10 +135,6 @@
      "/opt/anaconda3/envs/shane-langchain2/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `run` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
      "  warn_deprecated(\n",
      "/opt/anaconda3/envs/shane-langchain2/lib/python3.9/site-packages/transformers/generation/utils.py:1369: UserWarning: Using `max_length`'s default (4096) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
-      "  warnings.warn(\n",
-      "/opt/anaconda3/envs/shane-langchain2/lib/python3.9/site-packages/ipex_llm/transformers/models/llama.py:218: UserWarning: Passing `padding_mask` is deprecated and will be removed in v4.37.Please make sure use `attention_mask` instead.`\n",
-      "  warnings.warn(\n",
-      "/opt/anaconda3/envs/shane-langchain2/lib/python3.9/site-packages/ipex_llm/transformers/models/llama.py:218: UserWarning: Passing `padding_mask` is deprecated and will be removed in v4.37.Please make sure use `attention_mask` instead.`\n",
      "  warnings.warn(\n"
     ]
    },
@@ -158,13 +156,6 @@
    "question = \"What is AI?\"\n",
    "output = llm_chain.run(question)"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
--- a/docs/docs/integrations/llms/layerup_security.mdx
+++ b/docs/docs/integrations/llms/layerup_security.mdx
@@ -1,85 +0,0 @@
-# Layerup Security
-
-The [Layerup Security](https://uselayerup.com) integration allows you to secure your calls to any LangChain LLM, LLM chain or LLM agent. The LLM object wraps around any existing LLM object, allowing for a secure layer between your users and your LLMs.
-
-While the Layerup Security object is designed as an LLM, it is not actually an LLM itself, it simply wraps around an LLM, allowing it to adapt the same functionality as the underlying LLM.
-
-## Setup
-First, you'll need a Layerup Security account from the Layerup [website](https://uselayerup.com).
-
-Next, create a project via the [dashboard](https://dashboard.uselayerup.com), and copy your API key. We recommend putting your API key in your project's environment.
-
-Install the Layerup Security SDK:
-```bash
-pip install LayerupSecurity
-```
-
-And install LangChain Community:
-```bash
-pip install langchain-community
-```
-
-And now you're ready to start protecting your LLM calls with Layerup Security!
-
-```python
-from langchain_community.llms.layerup_security import LayerupSecurity
-from langchain_openai import OpenAI
-
-# Create an instance of your favorite LLM
-openai = OpenAI(
-    model_name="gpt-3.5-turbo",
-    openai_api_key="OPENAI_API_KEY",
-)
-
-# Configure Layerup Security
-layerup_security = LayerupSecurity(
-    # Specify a LLM that Layerup Security will wrap around
-    llm=openai,
-
-    # Layerup API key, from the Layerup dashboard
-    layerup_api_key="LAYERUP_API_KEY",
-
-    # Custom base URL, if self hosting
-    layerup_api_base_url="https://api.uselayerup.com/v1",
-
-    # List of guardrails to run on prompts before the LLM is invoked
-    prompt_guardrails=[],
-
-    # List of guardrails to run on responses from the LLM
-    response_guardrails=["layerup.hallucination"],
-
-    # Whether or not to mask the prompt for PII & sensitive data before it is sent to the LLM
-    mask=False,
-
-    # Metadata for abuse tracking, customer tracking, and scope tracking.
-    metadata={"customer": "example@uselayerup.com"},
-
-    # Handler for guardrail violations on the prompt guardrails
-    handle_prompt_guardrail_violation=(
-        lambda violation: {
-            "role": "assistant",
-            "content": (
-                "There was sensitive data! I cannot respond. "
-                "Here's a dynamic canned response. Current date: {}"
-            ).format(datetime.now())
-        }
-        if violation["offending_guardrail"] == "layerup.sensitive_data"
-        else None
-    ),
-
-    # Handler for guardrail violations on the response guardrails
-    handle_response_guardrail_violation=(
-        lambda violation: {
-            "role": "assistant",
-            "content": (
-                "Custom canned response with dynamic data! "
-                "The violation rule was {}."
-            ).format(violation["offending_guardrail"])
-        }
-    ),
-)
-
-response = layerup_security.invoke(
-    "Summarize this message: my name is Bob Dylan. My SSN is 123-45-6789."
-)
-```
--- a/docs/docs/integrations/llms/moonshot.ipynb
+++ b/docs/docs/integrations/llms/moonshot.ipynb
@@ -1,85 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# MoonshotChat\n",
-    "\n",
-    "[Moonshot](https://platform.moonshot.cn/) is a Chinese startup that provides LLM service for companies and individuals.\n",
-    "\n",
-    "This example goes over how to use LangChain to interact with Moonshot."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.llms.moonshot import Moonshot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "# Generate your api key from: https://platform.moonshot.cn/console/api-keys\n",
-    "os.environ[\"MOONSHOT_API_KEY\"] = \"MOONSHOT_API_KEY\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm = Moonshot()\n",
-    "# or use a specific model\n",
-    "# Available models: https://platform.moonshot.cn/docs\n",
-    "# llm = Moonshot(model=\"moonshot-v1-128k\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "pycharm": {
-     "is_executing": true
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Prompt the model\n",
-    "llm.invoke(\"What is the difference between panda and bear?\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/docs/docs/integrations/llms/solar.ipynb
+++ b/docs/docs/integrations/llms/solar.ipynb
@@ -1,120 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "2ff00e23-1a90-4a39-b220-83ebfffd96d6",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-03-06T17:10:57.375714Z",
-     "iopub.status.busy": "2024-03-06T17:10:57.375261Z",
-     "iopub.status.idle": "2024-03-06T17:11:03.473978Z",
-     "shell.execute_reply": "2024-03-06T17:11:03.472875Z",
-     "shell.execute_reply.started": "2024-03-06T17:10:57.375670Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "\"Once upon a time, in a far-off land, there was a young girl named Lily. Lily was a kind and curious girl who loved to explore the world around her. One day, while wandering through the forest, she came across a small, shimmering pond.\\n\\nAs she approached the pond, she saw a beautiful, glowing flower floating on the water's surface. Lily reached out to touch the flower, and as she did, she felt a strange tingling sensation. Suddenly, the flower began to glow even brighter, and Lily was transported to a magical world filled with talking animals and enchanted forests.\\n\\nIn this world, Lily met a wise old owl named Winston who told her that the flower she had touched was a magical one that could grant her any wish she desired. Lily was overjoyed and asked Winston to show her around the magical world.\\n\\nTogether, they explored the enchanted forests, met friendly animals, and discovered hidden treasures. Lily was having the time of her life, but she knew that she couldn't stay in this magical world forever. Eventually, she had to return home.\\n\\nAs she said goodbye to Winston and the magical world, Lily realized that she had learned an important lesson. She had discovered that sometimes, the most magical things in life are the ones that are right in front of us, if we only take the time to look.\\n\\nFrom that day on, Lily always kept her eyes open for the magic in the world around her, and she never forgot the adventure she had in the enchanted forest.\""
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import os\n",
-    "\n",
-    "from langchain_community.llms.solar import Solar\n",
-    "\n",
-    "os.environ[\"SOLAR_API_KEY\"] = \"SOLAR_API_KEY\"\n",
-    "llm = Solar()\n",
-    "llm.invoke(\"tell me a story?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "67fa1711-f08f-43fa-a3bd-75ae5bc6b988",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-03-06T17:11:11.359924Z",
-     "iopub.status.busy": "2024-03-06T17:11:11.358357Z",
-     "iopub.status.idle": "2024-03-06T17:11:16.692138Z",
-     "shell.execute_reply": "2024-03-06T17:11:16.686492Z",
-     "shell.execute_reply.started": "2024-03-06T17:11:11.359835Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/ary/dev/llm/langchain/libs/core/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `run` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
-      "  warn_deprecated(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'Step 1: Determine the year Justin Bieber was born.\\nJustin Bieber was born on March 1, 1994.\\n\\nStep 2: Determine the Super Bowl held in 1994.\\nSuper Bowl XXVIII was held in 1994.\\n\\nStep 3: Determine the winning team of Super Bowl XXVIII.\\nThe Dallas Cowboys won Super Bowl XXVIII in 1994.\\n\\nFinal Answer: The Dallas Cowboys won the Super Bowl in the year Justin Bieber was born (1994).'"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from langchain.chains import LLMChain\n",
-    "from langchain.prompts import PromptTemplate\n",
-    "from langchain_community.llms.solar import Solar\n",
-    "\n",
-    "template = \"\"\"Question: {question}\n",
-    "\n",
-    "Answer: Let's think step by step.\"\"\"\n",
-    "\n",
-    "prompt = PromptTemplate.from_template(template)\n",
-    "\n",
-    "llm = Solar()\n",
-    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
-    "\n",
-    "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n",
-    "\n",
-    "llm_chain.run(question)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "91961983-d0d5-4901-b854-531e158c0416",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/integrations/platforms/huggingface.mdx
+++ b/docs/docs/integrations/platforms/huggingface.mdx
@@ -90,7 +90,7 @@ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
 #### HuggingFaceBgeEmbeddings

 >[BGE models on the HuggingFace](https://huggingface.co/BAAI/bge-large-en) are [the best open-source embedding models](https://huggingface.co/spaces/mteb/leaderboard).
->BGE model is created by the [Beijing Academy of Artificial Intelligence (BAAI)](https://en.wikipedia.org/wiki/Beijing_Academy_of_Artificial_Intelligence). `BAAI` is a private non-profit organization engaged in AI research and development.
+>BGE model is created by the [Beijing Academy of Artificial Intelligence (BAAI)](https://www.baai.ac.cn/english.html). `BAAI` is a private non-profit organization engaged in AI research and development.

 See a [usage example](/docs/integrations/text_embedding/bge_huggingface).

--- a/docs/docs/integrations/platforms/microsoft.mdx
+++ b/docs/docs/integrations/platforms/microsoft.mdx
@@ -273,20 +273,19 @@ from langchain.retrievers import AzureCognitiveSearchRetriever

 ## Toolkits

-### Azure AI Services
+### Azure Cognitive Services

 We need to install several python packages.

 ```bash
-pip install azure-ai-formrecognizer azure-cognitiveservices-speech azure-ai-vision-imageanalysis
+pip install azure-ai-formrecognizer azure-cognitiveservices-speech azure-ai-vision
 ```

-See a [usage example](/docs/integrations/toolkits/azure_ai_services).
+See a [usage example](/docs/integrations/toolkits/azure_cognitive_services).

 ```python
-from langchain_community.agent_toolkits import azure_ai_services
+from langchain_community.agent_toolkits import O365Toolkit
 ```
-
 ### Microsoft Office 365 email and calendar

 We need to install `O365` python package.
--- a/docs/docs/integrations/providers/anyscale.mdx
+++ b/docs/docs/integrations/providers/anyscale.mdx
@@ -32,11 +32,3 @@ See a [usage example](/docs/integrations/chat/anyscale).
 ```python
 from langchain_community.chat_models.anyscale import ChatAnyscale
 ```
-
-## Embeddings
-
-See a [usage example](/docs/integrations/text_embedding/anyscale).
-
-```python
-from langchain_community.embeddings import AnyscaleEmbeddings
-```
--- a/docs/docs/integrations/providers/bittensor.mdx
+++ b/docs/docs/integrations/providers/bittensor.mdx
@@ -5,7 +5,10 @@

 ## Installation and Setup

-Get your API_KEY from [Neural Internet](https://neuralinternet.ai/).
+Get your API_KEY from [Neural Internet](https://api.neuralinternet.ai).
+
+You can [analyze API_KEYS](https://api.neuralinternet.ai/api-keys) 
+and [logs of your usage](https://api.neuralinternet.ai/logs).


 ## LLMs
--- a/docs/docs/integrations/providers/datadog.mdx
+++ b/docs/docs/integrations/providers/datadog.mdx
@@ -66,23 +66,23 @@ patch(langchain=True)
 # patch(langchain=True, openai=True)patch_all
 ```

-See the [APM Python library documentation](https://ddtrace.readthedocs.io/en/stable/installation_quickstart.html) for more advanced usage.
+See the [APM Python library documentation][https://ddtrace.readthedocs.io/en/stable/installation_quickstart.html] for more advanced usage.


 ## Configuration

-See the [APM Python library documentation](https://ddtrace.readthedocs.io/en/stable/integrations.html#langchain) for all the available configuration options.
+See the [APM Python library documentation][https://ddtrace.readthedocs.io/en/stable/integrations.html#langchain] for all the available configuration options.


 ### Log Prompt & Completion Sampling

 To enable log prompt and completion sampling, set the `DD_LANGCHAIN_LOGS_ENABLED=1` environment variable. By default, 10% of traced requests will emit logs containing the prompts and completions.

-To adjust the log sample rate, see the [APM library documentation](https://ddtrace.readthedocs.io/en/stable/integrations.html#langchain).
+To adjust the log sample rate, see the [APM library documentation][https://ddtrace.readthedocs.io/en/stable/integrations.html#langchain].

 **Note**: Logs submission requires `DD_API_KEY` to be specified when running `ddtrace-run`.


 ## Troubleshooting

-Need help? Create an issue on [ddtrace](https://github.com/DataDog/dd-trace-py) or contact [Datadog support](https://docs.datadoghq.com/help/).
+Need help? Create an issue on [ddtrace](https://github.com/DataDog/dd-trace-py) or contact [Datadog support][https://docs.datadoghq.com/help/].
--- a/docs/docs/integrations/providers/flyte.mdx
+++ b/docs/docs/integrations/providers/flyte.mdx
@@ -14,7 +14,7 @@ The purpose of this notebook is to demonstrate the integration of a `FlyteCallba

 ## Flyte Tasks

-A Flyte [task](https://docs.flyte.org/en/latest/user_guide/basics/tasks.html) serves as the foundational building block of Flyte.
+A Flyte [task](https://docs.flyte.org/projects/cookbook/en/latest/auto/core/flyte_basics/task.html) serves as the foundational building block of Flyte.
 To execute LangChain experiments, you need to write Flyte tasks that define the specific steps and operations involved.

 NOTE: The [getting started guide](https://docs.flyte.org/projects/cookbook/en/latest/index.html) offers detailed, step-by-step instructions on installing Flyte locally and running your initial Flyte pipeline.
@@ -46,9 +46,9 @@ os.environ["SERPAPI_API_KEY"] = "<your_serp_api_key>"
 Replace `<your_openai_api_key>` and `<your_serp_api_key>` with your respective API keys obtained from OpenAI and Serp API.

 To guarantee reproducibility of your pipelines, Flyte tasks are containerized.
-Each Flyte task must be associated with an image, which can either be shared across the entire Flyte [workflow](https://docs.flyte.org/en/latest/user_guide/basics/workflows.html) or provided separately for each task.
+Each Flyte task must be associated with an image, which can either be shared across the entire Flyte [workflow](https://docs.flyte.org/projects/cookbook/en/latest/auto/core/flyte_basics/basic_workflow.html) or provided separately for each task.

-To streamline the process of supplying the required dependencies for each Flyte task, you can initialize an [`ImageSpec`](https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/imagespec.html) object.
+To streamline the process of supplying the required dependencies for each Flyte task, you can initialize an [`ImageSpec`](https://docs.flyte.org/projects/cookbook/en/latest/auto/core/image_spec/image_spec.html) object.
 This approach automatically triggers a Docker build, alleviating the need for users to manually create a Docker image.

 ```python
--- a/docs/docs/integrations/providers/helicone.mdx
+++ b/docs/docs/integrations/providers/helicone.mdx
@@ -16,7 +16,7 @@ With your LangChain environment you can just add the following parameter.
 export OPENAI_API_BASE="https://oai.hconeai.com/v1"
 ```

-Now head over to [helicone.ai](https://www.helicone.ai/signup) to create your account, and add your OpenAI API key within our dashboard to view your logs.
+Now head over to [helicone.ai](https://helicone.ai/onboarding?step=2) to create your account, and add your OpenAI API key within our dashboard to view your logs.

 ![Interface for entering and managing OpenAI API keys in the Helicone dashboard.](/img/HeliconeKeys.png "Helicone API Key Input")

--- a/docs/docs/integrations/providers/log10.mdx
+++ b/docs/docs/integrations/providers/log10.mdx
@@ -35,7 +35,7 @@ llm = ChatOpenAI(model_name="gpt-3.5-turbo", callbacks=[log10_callback])

 [Log10 + Langchain + Logs docs](https://github.com/log10-io/log10/blob/main/logging.md#langchain-logger)

-[More details + screenshots](https://log10.io/docs/observability/logs) including instructions for self-hosting logs
+[More details + screenshots](https://log10.io/docs/logs) including instructions for self-hosting logs

 ## How to use tags with Log10

@@ -99,6 +99,6 @@ with log10_session(tags=["foo", "bar"]):

 ## How to debug Langchain calls

-[Example of debugging](https://log10.io/docs/observability/prompt_chain_debugging)
+[Example of debugging](https://log10.io/docs/prompt_chain_debugging)

 [More Langchain examples](https://github.com/log10-io/log10/tree/main/examples#langchain)
--- a/docs/docs/integrations/providers/mlflow.mdx
+++ b/docs/docs/integrations/providers/mlflow.mdx
@@ -51,7 +51,7 @@ mlflow deployments start-server --config-path /path/to/config.yaml
 > This module exports multivariate LangChain models in the langchain flavor and univariate LangChain
 > models in the pyfunc flavor.

-See the [API documentation and examples](https://www.mlflow.org/docs/latest/llms/langchain/index.html) for more information.
+See the [API documentation and examples](https://www.mlflow.org/docs/latest/python_api/mlflow.langchain) for more information.

 ## Completions Example

--- a/docs/docs/integrations/providers/mlflow_ai_gateway.mdx
+++ b/docs/docs/integrations/providers/mlflow_ai_gateway.mdx
@@ -6,9 +6,10 @@ MLflow AI Gateway has been deprecated. Please use [MLflow Deployments for LLMs](

 :::

->[The MLflow AI Gateway](https://www.mlflow.org/docs/latest/index.html) service is a powerful tool designed to streamline the usage and management of various large 
+>[The MLflow AI Gateway](https://www.mlflow.org/docs/latest/gateway/index) service is a powerful tool designed to streamline the usage and management of various large 
 > language model (LLM) providers, such as OpenAI and Anthropic, within an organization. It offers a high-level interface 
 > that simplifies the interaction with these services by providing a unified endpoint to handle specific LLM related requests.
+> See [the MLflow AI Gateway documentation](https://mlflow.org/docs/latest/gateway/index) for more details.

 ## Installation and Setup

@@ -57,7 +58,7 @@ mlflow gateway start --config-path /path/to/config.yaml
 > This module exports multivariate LangChain models in the langchain flavor and univariate LangChain 
 > models in the pyfunc flavor.
 
-See the [API documentation and examples](https://www.mlflow.org/docs/latest/python_api/mlflow.langchain.html?highlight=langchain#module-mlflow.langchain).
+See the [API documentation and examples](https://www.mlflow.org/docs/latest/python_api/mlflow.langchain).



--- a/docs/docs/integrations/providers/momento.mdx
+++ b/docs/docs/integrations/providers/momento.mdx
@@ -11,7 +11,7 @@ This page covers how to use the [Momento](https://gomomento.com) ecosystem withi

 ## Installation and Setup

- Sign up for a free account [here](https://console.gomomento.com/) to get an API key
+- Sign up for a free account [here](https://console.momentohq.com) to get an API key
 - Install the Momento Python SDK with `pip install momento`

 ## Cache
--- a/docs/docs/integrations/providers/pinecone.mdx
+++ b/docs/docs/integrations/providers/pinecone.mdx
@@ -8,7 +8,7 @@
 Install the Python SDK:

 ```bash
-pip install langchain-pinecone
+pip install pinecone-client
 ```


@@ -22,26 +22,3 @@ from langchain_pinecone import PineconeVectorStore
 ```

 For a more detailed walkthrough of the Pinecone vectorstore, see [this notebook](/docs/integrations/vectorstores/pinecone)
-
-## Retrievers
-
-### Pinecone Hybrid Search
-
-```bash
-pip install pinecone-client pinecone-text
-```
-
-```python
-from langchain_community.retrievers import (
-    PineconeHybridSearchRetriever,
-)
-```
-
-For more detailed information, see [this notebook](/docs/integrations/retrievers/pinecone_hybrid_search).
-
-
-### Self Query retriever
-
-Pinecone vector store can be used as a retriever for self-querying.
-
-For more detailed information, see [this notebook](/docs/integrations/retrievers/self_query/pinecone).
--- a/docs/docs/integrations/providers/psychic.mdx
+++ b/docs/docs/integrations/providers/psychic.mdx
@@ -1,13 +1,5 @@
---
-sidebar_class_name: hidden
---
-
 # Psychic

-:::warning
-This provider is no longer maintained, and may not work. Use with caution.
-:::
-
 >[Psychic](https://www.psychic.dev/) is a platform for integrating with SaaS tools like `Notion`, `Zendesk`, 
 > `Confluence`, and `Google Drive` via OAuth and syncing documents from these applications to your SQL or vector
 > database. You can think of it like Plaid for unstructured data. 
--- a/docs/docs/integrations/providers/symblai_nebula.mdx
+++ b/docs/docs/integrations/providers/symblai_nebula.mdx
@@ -7,6 +7,7 @@ It is broken into two parts: installation and setup, and then references to spec

 - Get an [Nebula API Key](https://info.symbl.ai/Nebula_Private_Beta.html) and set as environment variable `NEBULA_API_KEY`
 - Please see the [Nebula documentation](https://docs.symbl.ai/docs/nebula-llm) for more details.
+- No time? Visit the [Nebula Quickstart Guide](https://docs.symbl.ai/docs/nebula-quickstart).

 ### LLM

--- a/docs/docs/integrations/providers/trulens.mdx
+++ b/docs/docs/integrations/providers/trulens.mdx
@@ -8,7 +8,7 @@ TruLens is an [open-source](https://github.com/truera/trulens) package that prov

 ## Quick start

-Once you've created your LLM chain, you can use TruLens for evaluation and tracking. TruLens has a number of [out-of-the-box Feedback Functions](https://www.trulens.org/trulens_eval/evaluation/feedback_functions/), and is also an extensible framework for LLM evaluation.
+Once you've created your LLM chain, you can use TruLens for evaluation and tracking. TruLens has a number of [out-of-the-box Feedback Functions](https://www.trulens.org/trulens_eval/feedback_functions/), and is also an extensible framework for LLM evaluation.

 ```python
 # create a feedback function
--- a/docs/docs/integrations/providers/typesense.mdx
+++ b/docs/docs/integrations/providers/typesense.mdx
@@ -1,7 +1,7 @@
 # Typesense

 > [Typesense](https://typesense.org) is an open-source, in-memory search engine, that you can either 
-> [self-host](https://typesense.org/docs/guide/install-typesense.html#option-2-local-machine-self-hosting) or run 
+> [self-host](https://typesense.org/docs/guide/install-typesense#option-2-local-machine-self-hosting) or run 
 > on [Typesense Cloud](https://cloud.typesense.org/).
 > `Typesense` focuses on performance by storing the entire index in RAM (with a backup on disk) and also 
 > focuses on providing an out-of-the-box developer experience by simplifying available options and setting good defaults.
--- a/docs/docs/integrations/providers/unstructured.mdx
+++ b/docs/docs/integrations/providers/unstructured.mdx
@@ -28,7 +28,7 @@ simply run `pip install unstructured` and use `UnstructuredAPIFileLoader` or


 The Unstructured API requires API keys to make requests.
-You can request an API key [here](https://unstructured.io/api-key-hosted) and start using it today!
+You can generate a free API key [here](https://www.unstructured.io/api-key) and start using it today!
 Checkout the README [here](https://github.com/Unstructured-IO/unstructured-api) here to get started making API calls.
 We'd love to hear your feedback, let us know how it goes in our [community slack](https://join.slack.com/t/unstructuredw-kbe4326/shared_invite/zt-1x7cgo0pg-PTptXWylzPQF9xZolzCnwQ).
 And stay tuned for improvements to both quality and performance!
--- a/docs/docs/integrations/providers/vdms.mdx
+++ b/docs/docs/integrations/providers/vdms.mdx
@@ -1,62 +0,0 @@
-# VDMS
-
-> [VDMS](https://github.com/IntelLabs/vdms/blob/master/README.md) is a storage solution for efficient access
-> of big-”visual”-data that aims to achieve cloud scale by searching for relevant visual data via visual metadata
-> stored as a graph and enabling machine friendly enhancements to visual data for faster access.
-
-## Installation and Setup
-
-### Install Client
-
-```bash
-pip install vdms
-```
-
-### Install Database
-
-There are two ways to get started with VDMS:
-
-#### Install VDMS on your local machine via docker
-```bash
-    docker run -d -p 55555:55555 intellabs/vdms:latest
-```
-
-#### Install VDMS directly on your local machine
-Please see [installation instructions](https://github.com/IntelLabs/vdms/blob/master/INSTALL.md).
-
-
-
-## VectorStore
-
-The vector store is a simple wrapper around VDMS. It provides a simple interface to store and retrieve data.
-
-```python
-from langchain_community.document_loaders import TextLoader
-from langchain.text_splitter import CharacterTextSplitter
-
-loader = TextLoader("./state_of_the_union.txt")
-documents = loader.load()
-text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
-docs = text_splitter.split_documents(documents)
-
-from langchain_community.vectorstores import VDMS
-from langchain_community.vectorstores.vdms import VDMS_Client
-from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
-
-client = VDMS_Client("localhost", 55555)
-vectorstore = VDMS.from_documents(
-    docs,
-    client=client,
-    collection_name="langchain-demo",
-    embedding_function=HuggingFaceEmbeddings(),
-    engine="FaissFlat"
-    distance_strategy="L2",
-)
-
-query = "What did the president say about Ketanji Brown Jackson"
-results = vectorstore.similarity_search(query)
-```
-
-For a more detailed walkthrough of the VDMS wrapper, see [this notebook](/docs/integrations/vectorstores/vdms)
-
-
--- a/docs/docs/integrations/text_embedding/anyscale.ipynb
+++ b/docs/docs/integrations/text_embedding/anyscale.ipynb
--- a/docs/docs/integrations/text_embedding/bge_huggingface.ipynb
+++ b/docs/docs/integrations/text_embedding/bge_huggingface.ipynb
@@ -8,7 +8,7 @@
    "# BGE on Hugging Face\n",
    "\n",
    ">[BGE models on the HuggingFace](https://huggingface.co/BAAI/bge-large-en) are [the best open-source embedding models](https://huggingface.co/spaces/mteb/leaderboard).\n",
-    ">BGE model is created by the [Beijing Academy of Artificial Intelligence (BAAI)](https://en.wikipedia.org/wiki/Beijing_Academy_of_Artificial_Intelligence). `BAAI` is a private non-profit organization engaged in AI research and development.\n",
+    ">BGE model is created by the [Beijing Academy of Artificial Intelligence (BAAI)](https://www.baai.ac.cn/english.html). `BAAI` is a private non-profit organization engaged in AI research and development.\n",
    "\n",
    "This notebook shows how to use `BGE Embeddings` through `Hugging Face`"
   ]
--- a/docs/docs/integrations/text_embedding/openvino.ipynb
+++ b/docs/docs/integrations/text_embedding/openvino.ipynb
@@ -1,268 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "ed47bb62",
-   "metadata": {},
-   "source": [
-    "# OpenVINO Local Pipelines\n",
-    "[OpenVINO™](https://github.com/openvinotoolkit/openvino) is an open-source toolkit for optimizing and deploying AI inference. The OpenVINO™ Runtime supports various hardware [devices](https://github.com/openvinotoolkit/openvino?tab=readme-ov-file#supported-hardware-matrix) including x86 and ARM CPUs, and Intel GPUs. It can help to boost deep learning performance in Computer Vision, Automatic Speech Recognition, Natural Language Processing and other common tasks.\n",
-    "\n",
-    "Hugging Face embedding model can be supported by OpenVINO through ``OpenVINOEmbeddings`` class. If you have an Intel GPU, you can specify `model_kwargs={\"device\": \"GPU\"}` to run inference on it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "16b20335-da1d-46ba-aa23-fbf3e2c6fe60",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
-   "source": [
-    "%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "861521a9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.embeddings import OpenVINOEmbeddings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "ff9be586",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
-      "  warnings.warn(\n",
-      "Framework not specified. Using pt to export the model.\n",
-      "Using the export variant default. Available variants are:\n",
-      "    - default: The default ONNX variant.\n",
-      "Using framework PyTorch: 2.2.1+cu121\n",
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
-      "  warnings.warn(\n",
-      "Compiling the model to CPU ...\n"
-     ]
-    }
-   ],
-   "source": [
-    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
-    "model_kwargs = {\"device\": \"CPU\"}\n",
-    "encode_kwargs = {\"mean_pooling\": True, \"normalize_embeddings\": True}\n",
-    "\n",
-    "ov_embeddings = OpenVINOEmbeddings(\n",
-    "    model_name_or_path=model_name,\n",
-    "    model_kwargs=model_kwargs,\n",
-    "    encode_kwargs=encode_kwargs,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "d0a98ae9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "text = \"This is a test document.\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "5d6c682b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "query_result = ov_embeddings.embed_query(text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "b57b8ce9-ef7d-4e63-979e-aa8763d1f9a8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[-0.048951778560876846, -0.03986183926463127, -0.02156277745962143]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "query_result[:3]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "bb5e74c0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "doc_result = ov_embeddings.embed_documents([text])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "92019ef1-5d30-4985-b4e6-c0d98bdfe265",
-   "metadata": {},
-   "source": [
-    "## BGE with OpenVINO\n",
-    "We can also access BGE embedding models via the ``OpenVINOBgeEmbeddings`` class with OpenVINO. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "66f5c6ba-1446-43e1-b012-800d17cef300",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
-      "  warnings.warn(\n",
-      "Framework not specified. Using pt to export the model.\n",
-      "Using the export variant default. Available variants are:\n",
-      "    - default: The default ONNX variant.\n",
-      "Using framework PyTorch: 2.2.1+cu121\n",
-      "Overriding 1 configuration item(s)\n",
-      "\t- use_cache -> False\n",
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
-      "  warnings.warn(\n",
-      "Compiling the model to CPU ...\n"
-     ]
-    }
-   ],
-   "source": [
-    "from langchain_community.embeddings import OpenVINOBgeEmbeddings\n",
-    "\n",
-    "model_name = \"BAAI/bge-small-en\"\n",
-    "model_kwargs = {\"device\": \"CPU\"}\n",
-    "encode_kwargs = {\"normalize_embeddings\": True}\n",
-    "ov_embeddings = OpenVINOBgeEmbeddings(\n",
-    "    model_name_or_path=model_name,\n",
-    "    model_kwargs=model_kwargs,\n",
-    "    encode_kwargs=encode_kwargs,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "72001afb",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "384"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "embedding = ov_embeddings.embed_query(\"hi this is harrison\")\n",
-    "len(embedding)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7e86c9ae-ec63-48e9-97ba-f23f7a042ed1",
-   "metadata": {},
-   "source": [
-    "For more information refer to:\n",
-    "\n",
-    "* [OpenVINO LLM guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html).\n",
-    "\n",
-    "* [OpenVINO Documentation](https://docs.openvino.ai/2024/home.html).\n",
-    "\n",
-    "* [OpenVINO Get Started Guide](https://www.intel.com/content/www/us/en/content-details/819067/openvino-get-started-guide.html).\n",
-    "\n",
-    "* [RAG Notebook with LangChain](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-chatbot/rag-chatbot.ipynb)."
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "7377c2ccc78bc62c2683122d48c8cd1fb85a53850a1b1fc29736ed39852c9885"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/integrations/text_embedding/solar.ipynb
+++ b/docs/docs/integrations/text_embedding/solar.ipynb
--- a/docs/docs/integrations/toolkits/azure_ai_services.ipynb
+++ b/docs/docs/integrations/toolkits/azure_ai_services.ipynb
@@ -1,315 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Azure AI Services\n",
-    "\n",
-    "This toolkit is used to interact with the `Azure AI Services API` to achieve some multimodal capabilities.\n",
-    "\n",
-    "Currently There are five tools bundled in this toolkit:\n",
-    "- **AzureAiServicesImageAnalysisTool**: used to extract caption, objects, tags, and text from images.\n",
-    "- **AzureAiServicesDocumentIntelligenceTool**: used to extract text, tables, and key-value pairs from documents.\n",
-    "- **AzureAiServicesSpeechToTextTool**: used to transcribe speech to text.\n",
-    "- **AzureAiServicesTextToSpeechTool**: used to synthesize text to speech.\n",
-    "- **AzureAiServicesTextAnalyticsForHealthTool**: used to extract healthcare entities."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "First, you need to set up an Azure account and create an AI Services resource. You can follow the instructions [here](https://learn.microsoft.com/en-us/azure/ai-services/multi-service-resource) to create a resource. \n",
-    "\n",
-    "Then, you need to get the endpoint, key and region of your resource, and set them as environment variables. You can find them in the \"Keys and Endpoint\" page of your resource."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%pip install --upgrade --quiet  azure-ai-formrecognizer > /dev/null\n",
-    "%pip install --upgrade --quiet  azure-cognitiveservices-speech > /dev/null\n",
-    "%pip install --upgrade --quiet  azure-ai-textanalytics > /dev/null\n",
-    "%pip install --upgrade --quiet  azure-ai-vision-imageanalysis > /dev/null"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"sk-\"\n",
-    "os.environ[\"AZURE_AI_SERVICES_KEY\"] = \"\"\n",
-    "os.environ[\"AZURE_AI_SERVICES_ENDPOINT\"] = \"\"\n",
-    "os.environ[\"AZURE_AI_SERVICES_REGION\"] = \"\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create the Toolkit"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.agent_toolkits import AzureAiServicesToolkit\n",
-    "\n",
-    "toolkit = AzureAiServicesToolkit()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['azure_ai_services_document_intelligence',\n",
-       " 'azure_ai_services_image_analysis',\n",
-       " 'azure_ai_services_speech_to_text',\n",
-       " 'azure_ai_services_text_to_speech',\n",
-       " 'azure_ai_services_text_analytics_for_health']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "[tool.name for tool in toolkit.get_tools()]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Use within an Agent"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain import hub\n",
-    "from langchain.agents import AgentExecutor, create_structured_chat_agent\n",
-    "from langchain_openai import OpenAI"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm = OpenAI(temperature=0)\n",
-    "tools = toolkit.get_tools()\n",
-    "prompt = hub.pull(\"hwchase17/structured-chat-agent\")\n",
-    "agent = create_structured_chat_agent(llm, tools, prompt)\n",
-    "\n",
-    "agent_executor = AgentExecutor(\n",
-    "    agent=agent, tools=tools, verbose=True, handle_parsing_errors=True\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Thought: I need to use the azure_ai_services_image_analysis tool to analyze the image of the ingredients.\n",
-      "Action:\n",
-      "```\n",
-      "{\n",
-      "  \"action\": \"azure_ai_services_image_analysis\",\n",
-      "  \"action_input\": \"https://images.openai.com/blob/9ad5a2ab-041f-475f-ad6a-b51899c50182/ingredients.png\"\n",
-      "}\n",
-      "```\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3mCaption: a group of eggs and flour in bowls\n",
-      "Objects: Egg, Egg, Food\n",
-      "Tags: dairy, ingredient, indoor, thickening agent, food, mixing bowl, powder, flour, egg, bowl\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Action:\n",
-      "```\n",
-      "{\n",
-      "  \"action\": \"Final Answer\",\n",
-      "  \"action_input\": \"You can make a cake or other baked goods with these ingredients.\"\n",
-      "}\n",
-      "```\n",
-      "\n",
-      "\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'input': 'What can I make with these ingredients? https://images.openai.com/blob/9ad5a2ab-041f-475f-ad6a-b51899c50182/ingredients.png',\n",
-       " 'output': 'You can make a cake or other baked goods with these ingredients.'}"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "agent_executor.invoke(\n",
-    "    {\n",
-    "        \"input\": \"What can I make with these ingredients? \"\n",
-    "        + \"https://images.openai.com/blob/9ad5a2ab-041f-475f-ad6a-b51899c50182/ingredients.png\"\n",
-    "    }\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Thought: I can use the Azure AI Services Text to Speech API to convert text to speech.\n",
-      "Action:\n",
-      "```\n",
-      "{\n",
-      "  \"action\": \"azure_ai_services_text_to_speech\",\n",
-      "  \"action_input\": \"Why don't scientists trust atoms? Because they make up everything.\"\n",
-      "}\n",
-      "```\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m/tmp/tmpe48vamz0.wav\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "tts_result = agent_executor.invoke({\"input\": \"Tell me a joke and read it out for me.\"})\n",
-    "audio_file = tts_result.get(\"output\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from IPython import display\n",
-    "\n",
-    "audio = display.Audio(data=audio_file, autoplay=True, rate=22050)\n",
-    "display.display(audio)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Thought: The patient has a history of progressive angina, a strong family history of coronary artery disease, and a previous cardiac catheterization revealing total occlusion of the RCA and 50% left main disease.\n",
-      "Action:\n",
-      "```\n",
-      "{\n",
-      "  \"action\": \"azure_ai_services_text_analytics_for_health\",\n",
-      "  \"action_input\": \"The patient is a 54-year-old gentleman with a history of progressive angina over the past several months. The patient had a cardiac catheterization in July of this year revealing total occlusion of the RCA and 50% left main disease, with a strong family history of coronary artery disease with a brother dying at the age of 52 from a myocardial infarction and another brother who is status post coronary artery bypass grafting. The patient had a stress echocardiogram done on July, 2001, which showed no wall motion abnormalities, but this was a difficult study due to body habitus. The patient went for six minutes with minimal ST depressions in the anterior lateral leads, thought due to fatigue and wrist pain, his anginal equivalent. Due to the patient's increased symptoms and family history and history left main disease with total occasional of his RCA was referred for revascularization with open heart surgery.\"\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3mThe text contains the following healthcare entities: 54-year-old is a healthcare entity of type Age, gentleman is a healthcare entity of type Gender, progressive angina is a healthcare entity of type Diagnosis, past several months is a healthcare entity of type Time, cardiac catheterization is a healthcare entity of type ExaminationName, July of this year is a healthcare entity of type Time, total is a healthcare entity of type ConditionQualifier, occlusion is a healthcare entity of type SymptomOrSign, RCA is a healthcare entity of type BodyStructure, 50 is a healthcare entity of type MeasurementValue, % is a healthcare entity of type MeasurementUnit, left main disease is a healthcare entity of type Diagnosis, family is a healthcare entity of type FamilyRelation, coronary artery disease is a healthcare entity of type Diagnosis, brother is a healthcare entity of type FamilyRelation, dying is a healthcare entity of type Diagnosis, 52 is a healthcare entity of type Age, myocardial infarction is a healthcare entity of type Diagnosis, brother is a healthcare entity of type FamilyRelation, coronary artery bypass grafting is a healthcare entity of type TreatmentName, stress echocardiogram is a healthcare entity of type ExaminationName, July, 2001 is a healthcare entity of type Time, wall motion abnormalities is a healthcare entity of type SymptomOrSign, body habitus is a healthcare entity of type SymptomOrSign, six minutes is a healthcare entity of type Time, minimal is a healthcare entity of type ConditionQualifier, ST depressions in the anterior lateral leads is a healthcare entity of type SymptomOrSign, fatigue is a healthcare entity of type SymptomOrSign, wrist pain is a healthcare entity of type SymptomOrSign, anginal is a healthcare entity of type SymptomOrSign, increased is a healthcare entity of type Course, symptoms is a healthcare entity of type SymptomOrSign, family is a healthcare entity of type FamilyRelation, left main disease is a healthcare entity of type Diagnosis, occasional is a healthcare entity of type Course, RCA is a healthcare entity of type BodyStructure, revascularization is a healthcare entity of type TreatmentName, open heart surgery is a healthcare entity of type TreatmentName\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Action:\n",
-      "```\n",
-      "{\n",
-      "  \"action\": \"Final Answer\",\n",
-      "  \"action_input\": \"The patient's diagnoses include progressive angina, total occlusion of the RCA, 50% left main disease, coronary artery disease, myocardial infarction, and a family history of coronary artery disease.\"\n",
-      "}\n",
-      "\n",
-      "\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'input': \"\\nThe patient is a 54-year-old gentleman with a history of progressive angina over the past several months.\\nThe patient had a cardiac catheterization in July of this year revealing total occlusion of the RCA and 50% left main disease ,\\nwith a strong family history of coronary artery disease with a brother dying at the age of 52 from a myocardial infarction and\\nanother brother who is status post coronary artery bypass grafting. The patient had a stress echocardiogram done on July , 2001 ,\\nwhich showed no wall motion abnormalities , but this was a difficult study due to body habitus. The patient went for six minutes with\\nminimal ST depressions in the anterior lateral leads , thought due to fatigue and wrist pain , his anginal equivalent. Due to the patient's\\nincreased symptoms and family history and history left main disease with total occasional of his RCA was referred for revascularization with open heart surgery.\\n\\nList all the diagnoses.\\n\",\n",
-       " 'output': \"The patient's diagnoses include progressive angina, total occlusion of the RCA, 50% left main disease, coronary artery disease, myocardial infarction, and a family history of coronary artery disease.\"}"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sample_input = \"\"\"\n",
-    "The patient is a 54-year-old gentleman with a history of progressive angina over the past several months.\n",
-    "The patient had a cardiac catheterization in July of this year revealing total occlusion of the RCA and 50% left main disease ,\n",
-    "with a strong family history of coronary artery disease with a brother dying at the age of 52 from a myocardial infarction and\n",
-    "another brother who is status post coronary artery bypass grafting. The patient had a stress echocardiogram done on July , 2001 ,\n",
-    "which showed no wall motion abnormalities , but this was a difficult study due to body habitus. The patient went for six minutes with\n",
-    "minimal ST depressions in the anterior lateral leads , thought due to fatigue and wrist pain , his anginal equivalent. Due to the patient's\n",
-    "increased symptoms and family history and history left main disease with total occasional of his RCA was referred for revascularization with open heart surgery.\n",
-    "\n",
-    "List all the diagnoses.\n",
-    "\"\"\"\n",
-    "\n",
-    "agent_executor.invoke({\"input\": sample_input})"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
--- a/docs/docs/integrations/tools/alpha_vantage.ipynb
+++ b/docs/docs/integrations/tools/alpha_vantage.ipynb
@@ -21,7 +21,15 @@
   "metadata": {
    "id": "34bb5968"
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      " ········\n"
+     ]
+    }
+   ],
   "source": [
    "import getpass\n",
    "import os\n",
@@ -48,172 +56,14 @@
   "metadata": {
    "id": "84b8f773"
   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'Realtime Currency Exchange Rate': {'1. From_Currency Code': 'USD',\n",
-       "  '2. From_Currency Name': 'United States Dollar',\n",
-       "  '3. To_Currency Code': 'JPY',\n",
-       "  '4. To_Currency Name': 'Japanese Yen',\n",
-       "  '5. Exchange Rate': '148.19900000',\n",
-       "  '6. Last Refreshed': '2023-11-30 21:43:02',\n",
-       "  '7. Time Zone': 'UTC',\n",
-       "  '8. Bid Price': '148.19590000',\n",
-       "  '9. Ask Price': '148.20420000'}}"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "alpha_vantage = AlphaVantageAPIWrapper()\n",
-    "alpha_vantage._get_exchange_rate(\"USD\", \"JPY\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8309d09d",
-   "metadata": {},
-   "source": [
-    "The `_get_time_series_daily` method returns the date, daily open, daily high, daily low, daily close, and daily volume of the global equity specified, covering the 100 latest data points."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "811ae207",
-   "metadata": {},
   "outputs": [],
   "source": [
-    "alpha_vantage._get_time_series_daily(\"IBM\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b5e46a71",
-   "metadata": {},
-   "source": [
-    "The `_get_time_series_weekly` method returns the last trading day of the week, weekly open, weekly high, weekly low, weekly close, and weekly volume of the global equity specified, covering 20+ years of historical data."
+    "alpha_vantage = AlphaVantageAPIWrapper()"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "id": "f0dfe35b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "alpha_vantage._get_time_series_weekly(\"IBM\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "66cc06a7",
-   "metadata": {},
-   "source": [
-    "The `_get_quote_endpoint` method is a lightweight alternative to the time series APIs and returns the latest price and volume info for the specified symbol."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "98d012ef",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'Global Quote': {'01. symbol': 'IBM',\n",
-       "  '02. open': '156.9000',\n",
-       "  '03. high': '158.6000',\n",
-       "  '04. low': '156.8900',\n",
-       "  '05. price': '158.5400',\n",
-       "  '06. volume': '6640217',\n",
-       "  '07. latest trading day': '2023-11-30',\n",
-       "  '08. previous close': '156.4100',\n",
-       "  '09. change': '2.1300',\n",
-       "  '10. change percent': '1.3618%'}}"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "alpha_vantage._get_quote_endpoint(\"IBM\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3429ce50",
-   "metadata": {},
-   "source": [
-    "The `search_symbol` method returns a list of symbols and the matching company information based on the text entered."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0ccd55b0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "alpha_vantage.search_symbols(\"IB\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "96e1fd97",
-   "metadata": {},
-   "source": [
-    "The `_get_market_news_sentiment` method returns live and historical market news sentiment for a given asset."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "42995acb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "alpha_vantage._get_market_news_sentiment(\"IBM\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2fdbd888",
-   "metadata": {},
-   "source": [
-    "The `_get_top_gainers_losers` method returns the top 20 gainers, losers and most active stocks in the US market."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "61c3cb1c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "alpha_vantage._get_top_gainers_losers()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3d1cf3d8",
-   "metadata": {},
-   "source": [
-    "The `run` method of the wrapper takes the following parameters: from_currency, to_currency. \n",
-    "\n",
-    "It Gets the currency exchange rates for the given currency pair."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 5,
   "id": "068991a6",
   "metadata": {
    "id": "068991a6",
@@ -227,14 +77,14 @@
       " '2. From_Currency Name': 'United States Dollar',\n",
       " '3. To_Currency Code': 'JPY',\n",
       " '4. To_Currency Name': 'Japanese Yen',\n",
-       " '5. Exchange Rate': '148.19900000',\n",
-       " '6. Last Refreshed': '2023-11-30 21:43:02',\n",
+       " '5. Exchange Rate': '144.93000000',\n",
+       " '6. Last Refreshed': '2023-08-11 21:31:01',\n",
       " '7. Time Zone': 'UTC',\n",
-       " '8. Bid Price': '148.19590000',\n",
-       " '9. Ask Price': '148.20420000'}"
+       " '8. Bid Price': '144.92600000',\n",
+       " '9. Ask Price': '144.93400000'}"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -242,6 +92,14 @@
   "source": [
    "alpha_vantage.run(\"USD\", \"JPY\")"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84fc2b66-c08f-4cd3-ae13-494c54789c09",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/docs/docs/integrations/tools/infobip.ipynb
+++ b/docs/docs/integrations/tools/infobip.ipynb
@@ -1,176 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Infobip\n",
-    "This notebook that shows how to use [Infobip](https://www.infobip.com/) API wrapper to send SMS messages, emails.\n",
-    "\n",
-    "Infobip provides many services, but this notebook will focus on SMS and Email services. You can find more information about the API and other channels [here](https://www.infobip.com/docs/api)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Setup\n",
-    "\n",
-    "To use this tool you need to have an Infobip account. You can create [free trial account](https://www.infobip.com/docs/essentials/free-trial).\n",
-    "\n",
-    "\n",
-    "`InfobipAPIWrapper` uses name parameters where you can provide credentials:\n",
-    "\n",
-    "- `infobip_api_key` - [API Key](https://www.infobip.com/docs/essentials/api-authentication#api-key-header) that you can find in your [developer tools](https://portal.infobip.com/dev/api-keys)\n",
-    "- `infobip_base_url` - [Base url](https://www.infobip.com/docs/essentials/base-url) for Infobip API. You can use default value `https://api.infobip.com/`.\n",
-    "\n",
-    "You can also provide `infobip_api_key` and `infobip_base_url` as environment variables `INFOBIP_API_KEY` and `INFOBIP_BASE_URL`."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Sending a SMS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from langchain_community.utilities.infobip import InfobipAPIWrapper\n",
-    "\n",
-    "infobip: InfobipAPIWrapper = InfobipAPIWrapper()\n",
-    "\n",
-    "infobip.run(\n",
-    "    to=\"41793026727\",\n",
-    "    text=\"Hello, World!\",\n",
-    "    sender=\"Langchain\",\n",
-    "    channel=\"sms\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Sending a Email"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from langchain_community.utilities.infobip import InfobipAPIWrapper\n",
-    "\n",
-    "infobip: InfobipAPIWrapper = InfobipAPIWrapper()\n",
-    "\n",
-    "infobip.run(\n",
-    "    to=\"test@example.com\",\n",
-    "    sender=\"test@example.com\",\n",
-    "    subject=\"example\",\n",
-    "    body=\"example\",\n",
-    "    channel=\"email\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# How to use it inside an Agent "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from langchain import hub\n",
-    "from langchain.agents import AgentExecutor, create_openai_functions_agent\n",
-    "from langchain.tools import StructuredTool\n",
-    "from langchain_community.utilities.infobip import InfobipAPIWrapper\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "\n",
-    "instructions = \"You are a coding teacher. You are teaching a student how to code. The student asks you a question. You answer the question.\"\n",
-    "base_prompt = hub.pull(\"langchain-ai/openai-functions-template\")\n",
-    "prompt = base_prompt.partial(instructions=instructions)\n",
-    "llm = ChatOpenAI(temperature=0)\n",
-    "\n",
-    "\n",
-    "class EmailInput(BaseModel):\n",
-    "    body: str = Field(description=\"Email body text\")\n",
-    "    to: str = Field(description=\"Email address to send to. Example: email@example.com\")\n",
-    "    sender: str = Field(\n",
-    "        description=\"Email address to send from, must be 'validemail@example.com'\"\n",
-    "    )\n",
-    "    subject: str = Field(description=\"Email subject\")\n",
-    "    channel: str = Field(description=\"Email channel, must be 'email'\")\n",
-    "\n",
-    "\n",
-    "infobip_api_wrapper: InfobipAPIWrapper = InfobipAPIWrapper()\n",
-    "infobip_tool = StructuredTool.from_function(\n",
-    "    name=\"infobip_email\",\n",
-    "    description=\"Send Email via Infobip. If you need to send email, use infobip_email\",\n",
-    "    func=infobip_api_wrapper.run,\n",
-    "    args_schema=EmailInput,\n",
-    ")\n",
-    "tools = [infobip_tool]\n",
-    "\n",
-    "agent = create_openai_functions_agent(llm, tools, prompt)\n",
-    "agent_executor = AgentExecutor(\n",
-    "    agent=agent,\n",
-    "    tools=tools,\n",
-    "    verbose=True,\n",
-    ")\n",
-    "\n",
-    "agent_executor.invoke(\n",
-    "    {\n",
-    "        \"input\": \"Hi, can you please send me an example of Python recursion to my email email@example.com\"\n",
-    "    }\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "```bash\n",
-    "> Entering new AgentExecutor chain...\n",
-    "\n",
-    "Invoking: `infobip_email` with `{'body': 'Hi,\\n\\nHere is a simple example of a recursive function in Python:\\n\\n```\\ndef factorial(n):\\n    if n == 1:\\n        return 1\\n    else:\\n        return n * factorial(n-1)\\n```\\n\\nThis function calculates the factorial of a number. The factorial of a number is the product of all positive integers less than or equal to that number. The function calls itself with a smaller argument until it reaches the base case where n equals 1.\\n\\nBest,\\nCoding Teacher', 'to': 'email@example.com', 'sender': 'validemail@example.com', 'subject': 'Python Recursion Example', 'channel': 'email'}`\n",
-    "\n",
-    "\n",
-    "I have sent an example of Python recursion to your email. Please check your inbox.\n",
-    "\n",
-    "> Finished chain.\n",
-    "```"
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/docs/docs/integrations/tools/nvidia_riva.ipynb
+++ b/docs/docs/integrations/tools/nvidia_riva.ipynb
--- a/docs/docs/integrations/vectorstores/pathway.ipynb
+++ b/docs/docs/integrations/vectorstores/pathway.ipynb
@@ -1,191 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Pathway\n",
-    "> [Pathway](https://pathway.com/) is an open data processing framework. It allows you to easily develop data transformation pipelines and Machine Learning applications that work with live data sources and changing data.\n",
-    "\n",
-    "This notebook demonstrates how to use a live `Pathway` data indexing pipeline with `Langchain`. You can query the results of this pipeline from your chains in the same manner as you would a regular vector store. However, under the hood, Pathway updates the index on each data change giving you always up-to-date answers.\n",
-    "\n",
-    "In this notebook, we will use a [public demo document processing pipeline](https://pathway.com/solutions/ai-pipelines#try-it-out) that:\n",
-    "\n",
-    "1. Monitors several cloud data sources for data changes.\n",
-    "2. Builds a vector index for the data.\n",
-    "\n",
-    "To have your own document processing pipeline check the [hosted offering](https://pathway.com/solutions/ai-pipelines) or [build your own](https://pathway.com/developers/user-guide/llm-xpack/vectorstore_pipeline/).\n",
-    "\n",
-    "We will connect to the index using a `VectorStore` client, which implements the `similarity_search` function to retrieve matching documents.\n",
-    "\n",
-    "The basic pipeline used in this document allows to effortlessly build a simple vector index of files stored in a cloud location. However, Pathway provides everything needed to build realtime data pipelines and apps, including SQL-like able operations such as groupby-reductions and joins between disparate data sources, time-based grouping and windowing of data, and a wide array of connectors.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Querying the data pipeline"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To instantiate and configure the client you need to provide either the `url` or the `host` and `port` of your document indexing pipeline. In the code below we use a publicly available [demo pipeline](https://pathway.com/solutions/ai-pipelines#try-it-out), which REST API you can access at `https://demo-document-indexing.pathway.stream`. This demo ingests documents from [Google Drive](https://drive.google.com/drive/u/0/folders/1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs) and [Sharepoint](https://navalgo.sharepoint.com/sites/ConnectorSandbox/Shared%20Documents/Forms/AllItems.aspx?id=%2Fsites%2FConnectorSandbox%2FShared%20Documents%2FIndexerSandbox&p=true&ga=1) and maintains an index for retrieving documents."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.vectorstores import PathwayVectorClient\n",
-    "\n",
-    "client = PathwayVectorClient(url=\"https://demo-document-indexing.pathway.stream\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    " And we can start asking queries"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "query = \"What is Pathway?\"\n",
-    "docs = client.similarity_search(query)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(docs[0].page_content)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    " **Your turn!** [Get your pipeline](https://pathway.com/solutions/ai-pipelines) or upload [new documents](https://chat-realtime-sharepoint-gdrive.demo.pathway.com/) to the demo pipeline and retry the query!"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Filtering based on file metadata\n",
-    "\n",
-    "We support document filtering using [jmespath](https://jmespath.org/) expressions, for instance:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# take into account only sources modified later than unix timestamp\n",
-    "docs = client.similarity_search(query, metadata_filter=\"modified_at >= `1702672093`\")\n",
-    "\n",
-    "# take into account only sources modified later than unix timestamp\n",
-    "docs = client.similarity_search(query, metadata_filter=\"owner == `james`\")\n",
-    "\n",
-    "# take into account only sources with path containing 'repo_readme'\n",
-    "docs = client.similarity_search(query, metadata_filter=\"contains(path, 'repo_readme')\")\n",
-    "\n",
-    "# and of two conditions\n",
-    "docs = client.similarity_search(\n",
-    "    query, metadata_filter=\"owner == `james` && modified_at >= `1702672093`\"\n",
-    ")\n",
-    "\n",
-    "# or of two conditions\n",
-    "docs = client.similarity_search(\n",
-    "    query, metadata_filter=\"owner == `james` || modified_at >= `1702672093`\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Getting information on indexed files"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    " `PathwayVectorClient.get_vectorstore_statistics()` gives essential statistics on the state of the vector store, like the number of indexed files and the timestamp of last updated one. You can use it in your chains to tell the user how fresh is your knowledge base."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "client.get_vectorstore_statistics()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Your own pipeline"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Running in production\n",
-    "To have your own Pathway data indexing pipeline check the Pathway's offer for [hosted pipelines](https://pathway.com/solutions/ai-pipelines). You can also run your own Pathway pipeline - for information on how to build the pipeline refer to [Pathway guide](https://pathway.com/developers/user-guide/llm-xpack/vectorstore_pipeline/)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Processing documents\n",
-    "\n",
-    "The vectorization pipeline supports pluggable components for parsing, splitting and embedding documents. For embedding and splitting you can use [Langchain components](https://pathway.com/developers/user-guide/llm-xpack/vectorstore_pipeline/#langchain) or check [embedders](https://pathway.com/developers/api-docs/pathway-xpacks-llm/embedders) and [splitters](https://pathway.com/developers/api-docs/pathway-xpacks-llm/splitters) available in Pathway. If parser is not provided, it defaults to `UTF-8` parser. You can find available parsers [here](https://github.com/pathwaycom/pathway/blob/main/python/pathway/xpacks/llm/parser.py)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
--- a/docs/docs/integrations/vectorstores/vdms.ipynb
+++ b/docs/docs/integrations/vectorstores/vdms.ipynb
--- a/docs/docs/modules/agents/agent_types/openai_assistants.ipynb
+++ b/docs/docs/modules/agents/agent_types/openai_assistants.ipynb
@@ -163,7 +163,7 @@
   "id": "db6b9cbf-dd54-4346-be6c-842e08756ccc",
   "metadata": {},
   "source": [
-    ":::{.callout-tip}\n",
+    ":::tip\n",
    "\n",
    "[LangSmith trace](https://smith.langchain.com/public/6750972b-0849-4beb-a8bb-353d424ffade/r)\n",
    "\n",
--- a/docs/docs/modules/agents/agent_types/openai_tools.ipynb
+++ b/docs/docs/modules/agents/agent_types/openai_tools.ipynb
@@ -21,7 +21,7 @@
    "\n",
    "OpenAI termed the capability to invoke a **single** function as **functions**, and the capability to invoke **one or more** functions as **tools**.\n",
    "\n",
-    ":::{.callout-tip}\n",
+    ":::tip\n",
    "\n",
    "In the OpenAI Chat API, **functions** are now considered a legacy options that is deprecated in favor of **tools**.\n",
    "\n",
--- a/docs/docs/modules/agents/agent_types/xml_agent.ipynb
+++ b/docs/docs/modules/agents/agent_types/xml_agent.ipynb
@@ -19,7 +19,7 @@
    "\n",
    "Some language models (like Anthropic's Claude) are particularly good at reasoning/writing XML. This goes over how to use an agent that uses XML when prompting. \n",
    "\n",
-    ":::{.callout-tip}\n",
+    ":::tip\n",
    "\n",
    "* Use with regular LLMs, not with chat models.\n",
    "* Use only with unstructured tools; i.e., tools that accept a single string input.\n",
--- a/docs/docs/modules/data_connection/document_transformers/code_splitter.ipynb
+++ b/docs/docs/modules/data_connection/document_transformers/code_splitter.ipynb
@@ -22,7 +22,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
   "id": "a9e37aa1",
   "metadata": {},
   "outputs": [],
@@ -35,7 +35,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
   "id": "e21a2434",
   "metadata": {},
   "outputs": [
@@ -61,14 +61,10 @@
       " 'html',\n",
       " 'sol',\n",
       " 'csharp',\n",
-       " 'cobol',\n",
-       " 'c',\n",
-       " 'lua',\n",
-       " 'perl',\n",
-       " 'haskell']"
+       " 'cobol']"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -568,50 +564,13 @@
    "c_docs"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "af9de667-230e-4c2a-8c5f-122a28515d97",
-   "metadata": {},
-   "source": [
-    "## Haskell\n",
-    "Here's an example using the Haskell text splitter:"
-   ]
-  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "id": "688185b5",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='main :: IO ()'),\n",
-       " Document(page_content='main = do\\n    putStrLn \"Hello, World!\"\\n-- Some'),\n",
-       " Document(page_content='sample functions\\nadd :: Int -> Int -> Int\\nadd x y'),\n",
-       " Document(page_content='= x + y')]"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "HASKELL_CODE = \"\"\"\n",
-    "main :: IO ()\n",
-    "main = do\n",
-    "    putStrLn \"Hello, World!\"\n",
-    "-- Some sample functions\n",
-    "add :: Int -> Int -> Int\n",
-    "add x y = x + y\n",
-    "\"\"\"\n",
-    "haskell_splitter = RecursiveCharacterTextSplitter.from_language(\n",
-    "    language=Language.HASKELL, chunk_size=50, chunk_overlap=0\n",
-    ")\n",
-    "haskell_docs = haskell_splitter.create_documents([HASKELL_CODE])\n",
-    "haskell_docs"
-   ]
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/docs/docs/modules/data_connection/document_transformers/index.mdx
+++ b/docs/docs/modules/data_connection/document_transformers/index.mdx
@@ -43,7 +43,7 @@ LangChain offers many different types of text splitters. These all live in the `
 | Code      | Code (Python, JS) specific characters |               | Splits text based on characters specific to coding languages. 15 different languages are available to choose from.                                                                      |
 | Token     | Tokens                                |               | Splits text on tokens. There exist a few different ways to measure tokens.                                                                                                              |
 | Character | A user defined character              |               | Splits text based on a user defined character. One of the simpler methods.                                                                                                              |
-| [Experimental] Semantic Chunker | Sentences             |               | First splits on sentences. Then combines ones next to each other if they are semantically similar enough. Taken from [Greg Kamradt](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/tutorials/LevelsOfTextSplitting/5_Levels_Of_Text_Splitting.ipynb)                                                                                                              |
+| [Experimental] Semantic Chunker | Sentences             |               | First splits on sentences. Then combines ones next to each other if they are semantically similar enough. Taken from [Greg Kamradt](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb)                                                                                                              |
 | [AI21 Semantic Text Splitter](/docs/integrations/document_transformers/ai21_semantic_text_splitter) | Semantics             |    ✅           | Identifies distinct topics that form coherent pieces of text and splits along those. |


--- a/docs/docs/modules/data_connection/indexing.ipynb
+++ b/docs/docs/modules/data_connection/indexing.ipynb
@@ -60,7 +60,7 @@
    "   * document addition by id (`add_documents` method with `ids` argument)\n",
    "   * delete by id (`delete` method with `ids` argument)\n",
    "\n",
-    "Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `VDMS`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n",
+    "Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n",
    "  \n",
    "## Caution\n",
    "\n",
--- a/docs/docs/modules/model_io/chat/chat_model_caching.ipynb
+++ b/docs/docs/modules/model_io/chat/chat_model_caching.ipynb
@@ -12,33 +12,6 @@
    "It can speed up your application by reducing the number of API calls you make to the LLM provider.\n"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "289b31de",
-   "metadata": {},
-   "source": [
-    "```{=mdx}\n",
-    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
-    "\n",
-    "<ChatModelTabs customVarName=\"llm\" />\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c6641f37",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | output: false\n",
-    "# | echo: false\n",
-    "\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "\n",
-    "llm = ChatOpenAI()"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 1,
@@ -46,8 +19,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# <!-- ruff: noqa: F821 -->\n",
-    "from langchain.globals import set_llm_cache"
+    "from langchain.globals import set_llm_cache\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI()"
   ]
  },
  {
--- a/docs/docs/modules/model_io/chat/function_calling.mdx
+++ b/docs/docs/modules/model_io/chat/function_calling.mdx
@@ -18,13 +18,13 @@ structured outputs from models more generally.
 LangChain comes with a number of utilities to make function-calling
 easy. Namely, it comes with:

- simple syntax for binding functions to models
- converters for formatting various types of objects to the expected
-  function schemas
- output parsers for extracting the function invocations from API
-  responses
- chains for getting structured outputs from a model, built on top of
-  function calling
+-   simple syntax for binding functions to models
+-   converters for formatting various types of objects to the expected
+    function schemas
+-   output parsers for extracting the function invocations from API
+    responses
+-   chains for getting structured outputs from a model, built on top of
+    function calling

 We’ll focus here on the first two points. For a detailed guide on output
 parsing check out the [OpenAI Tools output
@@ -38,6 +38,7 @@ Before getting started make sure you have `langchain-core` installed.
 %pip install -qU langchain-core langchain-openai
 ```

+
 ```python
 import getpass
 import os
@@ -63,26 +64,38 @@ class Multiply(BaseModel):
    b: int = Field(..., description="Second integer")
 ```

-import Tabs from "@theme/Tabs";
-import TabItem from "@theme/TabItem";
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';

-import ChatModelTabs from "@theme/ChatModelTabs";
+<Tabs>
+<TabItem value="openai" label="OpenAI" default>

-<ChatModelTabs
-  customVarName="llm"
-  fireworksParams={`model="accounts/fireworks/models/firefunction-v1", temperature=0`}
-/>
-
-We can use the `bind_tools()` method to handle converting
-`Multiply` to a "function" and binding it to the model (i.e.,
-passing it in each time the model is invoked).
+Set up dependencies and API keys:

 ```python
+%pip install -qU langchain-openai
+```
+
+
+```python
+os.environ["OPENAI_API_KEY"] = getpass.getpass()
+```
+
+We can use the `ChatOpenAI.bind_tools()` method to handle converting
+`Multiply` to an OpenAI function and binding it to the model (i.e.,
+passing it in each time the model is invoked).
+
+
+
+```python
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
 llm_with_tools = llm.bind_tools([Multiply])
 llm_with_tools.invoke("what's 3 * 12")
 ```

-```text
+``` text
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Q8ZQ97Qrj5zalugSkYMGV1Uo', 'function': {'arguments': '{"a":3,"b":12}', 'name': 'Multiply'}, 'type': 'function'}]})
 ```

@@ -96,7 +109,7 @@ tool_chain = llm_with_tools | JsonOutputToolsParser()
 tool_chain.invoke("what's 3 * 12")
 ```

-```text
+``` text
 [{'type': 'Multiply', 'args': {'a': 3, 'b': 12}}]
 ```

@@ -109,10 +122,57 @@ tool_chain = llm_with_tools | PydanticToolsParser(tools=[Multiply])
 tool_chain.invoke("what's 3 * 12")
 ```

-```text
+``` text
 [Multiply(a=3, b=12)]
 ```

+If we wanted to force that a tool is used (and that it is used only
+once), we can set the `tool_choice` argument:
+
+```python
+llm_with_multiply = llm.bind_tools([Multiply], tool_choice="Multiply")
+llm_with_multiply.invoke(
+    "make up some numbers if you really want but I'm not forcing you"
+)
+```
+
+``` text
+AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_f3DApOzb60iYjTfOhVFhDRMI', 'function': {'arguments': '{"a":5,"b":10}', 'name': 'Multiply'}, 'type': 'function'}]})
+```
+
+For more see the [ChatOpenAI API
+reference](https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html#langchain_openai.chat_models.base.ChatOpenAI.bind_tools).
+
+</TabItem>
+<TabItem value="fireworks" label="Fireworks">
+
+Install dependencies and set API keys:
+
+```python
+%pip install -qU langchain-fireworks
+```
+
+
+```python
+os.environ["FIREWORKS_API_KEY"] = getpass.getpass()
+```
+
+We can use the `ChatFireworks.bind_tools()` method to handle converting
+`Multiply` to a valid function schema and binding it to the model (i.e.,
+passing it in each time the model is invoked).
+
+```python
+from langchain_fireworks import ChatFireworks
+
+llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0)
+llm_with_tools = llm.bind_tools([Multiply])
+llm_with_tools.invoke("what's 3 * 12")
+```
+
+``` text
+AIMessage(content='Three multiplied by twelve is 36.')
+```
+
 If our model isn’t using the tool, as is the case here, we can force
 tool usage by specifying `tool_choice="any"` or by specifying the name
 of the specific tool we want used:
@@ -122,12 +182,175 @@ llm_with_tools = llm.bind_tools([Multiply], tool_choice="Multiply")
 llm_with_tools.invoke("what's 3 * 12")
 ```

-```text
+``` text
 AIMessage(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_qIP2bJugb67LGvc6Zhwkvfqc', 'type': 'function', 'function': {'name': 'Multiply', 'arguments': '{"a": 3, "b": 12}'}}]})
 ```

+We can add a tool parser to extract the tool calls from the generated
+message to JSON:
+
+```python
+from langchain_core.output_parsers.openai_tools import JsonOutputToolsParser
+
+tool_chain = llm_with_tools | JsonOutputToolsParser()
+tool_chain.invoke("what's 3 * 12")
+```
+
+``` text
+[{'type': 'Multiply', 'args': {'a': 3, 'b': 12}}]
+```
+
+Or back to the original Pydantic class:
+
+```python
+from langchain_core.output_parsers.openai_tools import PydanticToolsParser
+
+tool_chain = llm_with_tools | PydanticToolsParser(tools=[Multiply])
+tool_chain.invoke("what's 3 * 12")
+```
+
+``` text
+[Multiply(a=3, b=12)]
+```
+
+For more see the [ChatFireworks](https://api.python.langchain.com/en/latest/chat_models/langchain_fireworks.chat_models.ChatFireworks.html#langchain_fireworks.chat_models.ChatFireworks.bind_tools) reference.
+
+</TabItem>
+<TabItem value="mistral" label="Mistral">
+
+Install dependencies and set API keys:
+
+```python
+%pip install -qU langchain-mistralai
+```
+
+
+```python
+os.environ["MISTRAL_API_KEY"] = getpass.getpass()
+```
+
+We can use the `ChatMistralAI.bind_tools()` method to handle converting
+`Multiply` to a valid function schema and binding it to the model (i.e.,
+passing it in each time the model is invoked).
+
+```python
+from langchain_mistralai import ChatMistralAI
+
+llm = ChatMistralAI(model="mistral-large-latest", temperature=0)
+llm_with_tools = llm.bind_tools([Multiply])
+llm_with_tools.invoke("what's 3 * 12")
+```
+
+``` text
+AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'null', 'type': <ToolType.function: 'function'>, 'function': {'name': 'Multiply', 'arguments': '{"a": 3, "b": 12}'}}]})
+```
+
+We can add a tool parser to extract the tool calls from the generated
+message to JSON:
+
+```python
+from langchain_core.output_parsers.openai_tools import JsonOutputToolsParser
+
+tool_chain = llm_with_tools | JsonOutputToolsParser()
+tool_chain.invoke("what's 3 * 12")
+```
+
+``` text
+[{'type': 'Multiply', 'args': {'a': 3, 'b': 12}}]
+```
+
+Or back to the original Pydantic class:
+
+```python
+from langchain_core.output_parsers.openai_tools import PydanticToolsParser
+
+tool_chain = llm_with_tools | PydanticToolsParser(tools=[Multiply])
+tool_chain.invoke("what's 3 * 12")
+```
+
+``` text
+[Multiply(a=3, b=12)]
+```
+
+We can force tool usage by specifying `tool_choice="any"`:
+
+```python
+llm_with_tools = llm.bind_tools([Multiply], tool_choice="any")
+llm_with_tools.invoke("I don't even want you to use the tool")
+```
+
+``` text
+AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'null', 'type': <ToolType.function: 'function'>, 'function': {'name': 'Multiply', 'arguments': '{"a": 5, "b": 7}'}}]})
+```
+
+For more see the [ChatMistralAI API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html#langchain_mistralai.chat_models.ChatMistralAI).
+
+</TabItem>
+<TabItem value="together" label="Together">
+
+Since TogetherAI is a drop-in replacement for OpenAI, we can just use
+the OpenAI integration.
+
+Install dependencies and set API keys:
+
+```python
+%pip install -qU langchain-openai
+```
+
+
+```python
+os.environ["TOGETHER_API_KEY"] = getpass.getpass()
+```
+
+We can use the `ChatOpenAI.bind_tools()` method to handle converting
+`Multiply` to a valid function schema and binding it to the model (i.e.,
+passing it in each time the model is invoked).
+
+```python
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(
+    base_url="https://api.together.xyz/v1",
+    api_key=os.environ["TOGETHER_API_KEY"],
+    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+)
+llm_with_tools = llm.bind_tools([Multiply])
+llm_with_tools.invoke("what's 3 * 12")
+```
+
+``` text
+AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_4tc61dp0478zafqe33hfriee', 'function': {'arguments': '{"a":3,"b":12}', 'name': 'Multiply'}, 'type': 'function'}]})
+```
+
+We can add a tool parser to extract the tool calls from the generated
+message to JSON:
+
+```python
+from langchain_core.output_parsers.openai_tools import JsonOutputToolsParser
+
+tool_chain = llm_with_tools | JsonOutputToolsParser()
+tool_chain.invoke("what's 3 * 12")
+```
+
+``` text
+[{'type': 'Multiply', 'args': {'a': 3, 'b': 12}}]
+```
+
+Or back to the original Pydantic class:
+
+```python
+from langchain_core.output_parsers.openai_tools import PydanticToolsParser
+
+tool_chain = llm_with_tools | PydanticToolsParser(tools=[Multiply])
+tool_chain.invoke("what's 3 * 12")
+```
+
+``` text
+[Multiply(a=3, b=12)]
+```
+
 If we wanted to force that a tool is used (and that it is used only
-once), we can set the `tool_choice` argument to the name of the tool:
+once), we can set the `tool_choice` argument:

 ```python
 llm_with_multiply = llm.bind_tools([Multiply], tool_choice="Multiply")
@@ -136,13 +359,16 @@ llm_with_multiply.invoke(
 )
 ```

-```text
-AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_f3DApOzb60iYjTfOhVFhDRMI', 'function': {'arguments': '{"a":5,"b":10}', 'name': 'Multiply'}, 'type': 'function'}]})
+``` text
+AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_6k6d0gr3jhqil2kqf7sgeusl', 'function': {'arguments': '{"a":5,"b":7}', 'name': 'Multiply'}, 'type': 'function'}]})
 ```

 For more see the [ChatOpenAI API
 reference](https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html#langchain_openai.chat_models.base.ChatOpenAI.bind_tools).

+</TabItem>
+</Tabs>
+
 ## Defining functions schemas

 In case you need to access function schemas directly, LangChain has a built-in converter that can turn
@@ -169,7 +395,7 @@ def multiply(a: int, b: int) -> int:
 print(json.dumps(convert_to_openai_tool(multiply), indent=2))
 ```

-```text
+``` text
 {
  "type": "function",
  "function": {
@@ -212,7 +438,7 @@ class multiply(BaseModel):
 print(json.dumps(convert_to_openai_tool(multiply), indent=2))
 ```

-```text
+``` text
 {
  "type": "function",
  "function": {
@@ -267,7 +493,7 @@ class Multiply(BaseTool):
 print(json.dumps(convert_to_openai_tool(Multiply()), indent=2))
 ```

-```text
+``` text
 {
  "type": "function",
  "function": {
@@ -296,14 +522,14 @@ print(json.dumps(convert_to_openai_tool(Multiply()), indent=2))

 ## Next steps

- **Output parsing**: See [OpenAI Tools output
-  parsers](../../../../docs/modules/model_io/output_parsers/types/openai_tools)
-  and [OpenAI Functions output
-  parsers](../../../../docs/modules/model_io/output_parsers/types/openai_functions)
-  to learn about extracting the function calling API responses into
-  various formats.
- **Structured output chains**: [Some models have constructors](../../../../docs/guides/structured_output) that
-  handle creating a structured output chain for you.
- **Tool use**: See how to construct chains and agents that actually
-  call the invoked tools in [these
-  guides](../../../../docs/use_cases/tool_use/).
+-   **Output parsing**: See [OpenAI Tools output
+    parsers](../../../../docs/modules/model_io/output_parsers/types/openai_tools)
+    and [OpenAI Functions output
+    parsers](../../../../docs/modules/model_io/output_parsers/types/openai_functions)
+    to learn about extracting the function calling API responses into
+    various formats.
+-   **Structured output chains**: [Some models have constructors](../../../../docs/guides/structured_output) that
+    handle creating a structured output chain for you.
+-   **Tool use**: See how to construct chains and agents that actually
+    call the invoked tools in [these
+    guides](../../../../docs/use_cases/tool_use/).
--- a/docs/docs/modules/model_io/chat/quick_start.ipynb
+++ b/docs/docs/modules/model_io/chat/quick_start.ipynb
@@ -22,19 +22,32 @@
    "While chat models use language models under the hood, the interface they use is a bit different.\n",
    "Rather than using a \"text in, text out\" API, they use an interface where \"chat messages\" are the inputs and outputs.\n",
    "\n",
-    "## Setup\n"
+    "## Setup\n",
+    "\n",
+    "For this example we'll need to install the OpenAI partner package:\n",
+    "\n",
+    "```bash\n",
+    "pip install langchain-openai\n",
+    "```\n",
+    "\n",
+    "Accessing the API requires an API key, which you can get by creating an account and heading [here](https://platform.openai.com/account/api-keys). Once we have a key we'll want to set it as an environment variable by running:\n",
+    "\n",
+    "```bash\n",
+    "export OPENAI_API_KEY=\"...\"\n",
+    "```\n",
+    "If you'd prefer not to set an environment variable you can pass the key in directly via the `openai_api_key` named parameter when initiating the OpenAI LLM class:\n"
   ]
  },
  {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
   "id": "e230abb2-bc84-438b-b9ff-dd124acb1375",
   "metadata": {},
+   "outputs": [],
   "source": [
-    "```{=mdx}\n",
-    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
+    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "<ChatModelTabs customVarName=\"chat\" />\n",
-    "```"
+    "chat = ChatOpenAI(openai_api_key=\"...\")"
   ]
  },
  {
@@ -42,25 +55,19 @@
   "id": "609bbd5c-e5a1-4166-89e1-d6c52054860d",
   "metadata": {},
   "source": [
-    "If you'd prefer not to set an environment variable you can pass the key in directly via the api key arg named parameter when initiating the chat model class:"
+    "Otherwise you can initialize without any params:"
   ]
  },
  {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 1,
   "id": "3d9dbf70-2397-4d6b-87ec-3e6d4699f3df",
   "metadata": {},
+   "outputs": [],
   "source": [
-    "```{=mdx}\n",
-    "<ChatModelTabs\n",
-    "  openaiParams={`model=\"gpt-3.5-turbo-0125\", openai_api_key=\"...\"`}\n",
-    "  anthropicParams={`model=\"claude-3-sonnet-20240229\", anthropic_api_key=\"...\"`}\n",
-    "  fireworksParams={`model=\"accounts/fireworks/models/mixtral-8x7b-instruct\", fireworks_api_key=\"...\"`}\n",
-    "  mistralParams={`model=\"mistral-large-latest\", mistral_api_key=\"...\"`}\n",
-    "  googleParams={`model=\"gemini-pro\", google_api_key=\"...\"`}\n",
-    "  togetherParams={`, together_api_key=\"...\"`}\n",
-    "  customVarName=\"chat\"\n",
-    "/>\n",
-    "```"
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "chat = ChatOpenAI()"
   ]
  },
  {
@@ -101,21 +108,6 @@
    "]"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "570dae71",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | output: false\n",
-    "# | echo: false\n",
-    "\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "\n",
-    "chat = ChatOpenAI()"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 11,
--- a/docs/docs/use_cases/question_answering/chat_history.ipynb
+++ b/docs/docs/use_cases/question_answering/chat_history.ipynb
@@ -19,7 +19,7 @@
    "\n",
    "In many Q&A applications we want to allow the user to have a back-and-forth conversation, meaning the application needs some sort of \"memory\" of past questions and answers, and some logic for incorporating those into its current thinking.\n",
    "\n",
-    "In this guide we focus on **adding logic for incorporating historical messages.** Further details on chat history management is [covered here](/docs/expression_language/how_to/message_history).\n",
+    "In this guide we focus on **adding logic for incorporating historical messages, and NOT on chat history management.** Chat history management is [covered here](/docs/expression_language/how_to/message_history).\n",
    "\n",
    "We'll work off of the Q&A app we built over the [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng in the [Quickstart](/docs/use_cases/question_answering/quickstart). We'll need to update two things about our existing app:\n",
    "\n",
@@ -90,7 +90,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "id": "07411adb-3722-4f65-ab7f-8f6f57663d11",
   "metadata": {},
   "outputs": [],
@@ -111,7 +111,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
   "id": "d8a913b1-0eea-442a-8a64-ec73333f104b",
   "metadata": {},
   "outputs": [],
@@ -128,7 +128,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
   "id": "820244ae-74b4-4593-b392-822979dd91b8",
   "metadata": {},
   "outputs": [],
@@ -168,17 +168,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "id": "22206dfd-d673-4fa4-887f-349d273cb3f2",
+   "execution_count": 6,
+   "id": "0d3b0f36-7b56-49c0-8e40-a1aa9ebcbf24",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents to plan and execute tasks more efficiently by dividing them into manageable subgoals. Task decomposition can be achieved through various methods, including using prompting techniques, task-specific instructions, or human inputs.'"
+       "'Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It can be done through prompting techniques like Chain of Thought or Tree of Thoughts, or by using task-specific instructions or human inputs. Task decomposition helps agents plan ahead and manage complicated tasks more effectively.'"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -196,21 +196,16 @@
    "\n",
    "First we'll need to define a sub-chain that takes historical messages and the latest user question, and reformulates the question if it makes reference to any information in the historical information.\n",
    "\n",
-    "We'll use a prompt that includes a `MessagesPlaceholder` variable under the name \"chat_history\". This allows us to pass in a list of Messages to the prompt using the \"chat_history\" input key, and these messages will be inserted after the system message and before the human message containing the latest question.\n",
-    "\n",
-    "Note that we leverage a helper function [create_history_aware_retriever](https://api.python.langchain.com/en/latest/chains/langchain.chains.history_aware_retriever.create_history_aware_retriever.html) for this step, which manages the case where `chat_history` is empty, and otherwise applies `prompt | llm | StrOutputParser() | retriever` in sequence.\n",
-    "\n",
-    "`create_history_aware_retriever` constructs a chain that accepts keys `input` and `chat_history` as input, and has the same output schema as a retriever."
+    "We'll use a prompt that includes a `MessagesPlaceholder` variable under the name \"chat_history\". This allows us to pass in a list of Messages to the prompt using the \"chat_history\" input key, and these messages will be inserted after the system message and before the human message containing the latest question."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 28,
   "id": "2b685428-8b82-4af1-be4f-7232c5d55b73",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain.chains import create_history_aware_retriever\n",
    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
    "\n",
    "contextualize_q_system_prompt = \"\"\"Given a chat history and the latest user question \\\n",
@@ -220,13 +215,11 @@
    "contextualize_q_prompt = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\"system\", contextualize_q_system_prompt),\n",
-    "        MessagesPlaceholder(\"chat_history\"),\n",
-    "        (\"human\", \"{input}\"),\n",
+    "        MessagesPlaceholder(variable_name=\"chat_history\"),\n",
+    "        (\"human\", \"{question}\"),\n",
    "    ]\n",
    ")\n",
-    "history_aware_retriever = create_history_aware_retriever(\n",
-    "    llm, retriever, contextualize_q_prompt\n",
-    ")"
+    "contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()"
   ]
  },
  {
@@ -234,7 +227,38 @@
   "id": "23cbd8d7-7162-4fb0-9e69-67ea4d4603a5",
   "metadata": {},
   "source": [
-    "This chain prepends a rephrasing of the input query to our retriever, so that the retrieval incorporates the context of the conversation."
+    "Using this chain we can ask follow-up questions that reference past messages and have them reformulated into standalone questions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "46ee9aa1-16f1-4509-8dae-f8c71f4ad47d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'What is the definition of \"large\" in the context of a language model?'"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.messages import AIMessage, HumanMessage\n",
+    "\n",
+    "contextualize_q_chain.invoke(\n",
+    "    {\n",
+    "        \"chat_history\": [\n",
+    "            HumanMessage(content=\"What does LLM stand for?\"),\n",
+    "            AIMessage(content=\"Large language model\"),\n",
+    "        ],\n",
+    "        \"question\": \"What is meant by large\",\n",
+    "    }\n",
+    ")"
   ]
  },
  {
@@ -246,21 +270,16 @@
    "\n",
    "And now we can build our full QA chain. \n",
    "\n",
-    "Here we use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer.\n",
-    "\n",
-    "We build our final `rag_chain` with [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html). This chain applies the `history_aware_retriever` and `question_answer_chain` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output."
+    "Notice we add some routing functionality to only run the \"condense question chain\" when our chat history isn't empty. Here we're taking advantage of the fact that if a function in an LCEL chain returns another chain, that chain will itself be invoked."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 30,
   "id": "66f275f3-ddef-4678-b90d-ee64576878f9",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain.chains import create_retrieval_chain\n",
-    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
-    "\n",
    "qa_system_prompt = \"\"\"You are an assistant for question-answering tasks. \\\n",
    "Use the following pieces of retrieved context to answer the question. \\\n",
    "If you don't know the answer, just say that you don't know. \\\n",
@@ -270,44 +289,54 @@
    "qa_prompt = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\"system\", qa_system_prompt),\n",
-    "        MessagesPlaceholder(\"chat_history\"),\n",
-    "        (\"human\", \"{input}\"),\n",
+    "        MessagesPlaceholder(variable_name=\"chat_history\"),\n",
+    "        (\"human\", \"{question}\"),\n",
    "    ]\n",
    ")\n",
    "\n",
    "\n",
-    "question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)\n",
+    "def contextualized_question(input: dict):\n",
+    "    if input.get(\"chat_history\"):\n",
+    "        return contextualize_q_chain\n",
+    "    else:\n",
+    "        return input[\"question\"]\n",
    "\n",
-    "rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)"
+    "\n",
+    "rag_chain = (\n",
+    "    RunnablePassthrough.assign(\n",
+    "        context=contextualized_question | retriever | format_docs\n",
+    "    )\n",
+    "    | qa_prompt\n",
+    "    | llm\n",
+    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
-   "id": "0005810b-1b95-4666-a795-08d80e478b83",
+   "execution_count": 31,
+   "id": "51fd0e54-5bb4-4a9a-b012-87a18ebe2bef",
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Task decomposition can be done in several common ways, including using Language Model (LLM) with simple prompting like \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\", providing task-specific instructions tailored to the specific task at hand, or incorporating human inputs to guide the decomposition process. These methods help in breaking down complex tasks into smaller, more manageable subtasks for efficient execution.\n"
-     ]
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Common ways of task decomposition include:\\n\\n1. Using Chain of Thought (CoT): CoT is a prompting technique that instructs the model to \"think step by step\" and decompose complex tasks into smaller and simpler steps. This approach utilizes more computation at test-time and sheds light on the model\\'s thinking process.\\n\\n2. Prompting with LLM: Language Model (LLM) can be used to prompt the model with simple instructions like \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\" This method guides the model to break down the task into manageable steps.\\n\\n3. Task-specific instructions: For certain tasks, task-specific instructions can be provided to guide the model in decomposing the task. For example, for writing a novel, the instruction \"Write a story outline\" can be given to help the model break down the task into smaller components.\\n\\n4. Human inputs: In some cases, human inputs can be used to assist in task decomposition. Humans can provide insights, expertise, and domain knowledge to help break down complex tasks into smaller subtasks.\\n\\nThese approaches aim to simplify complex tasks and enable more effective problem-solving and planning.')"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
    }
   ],
   "source": [
-    "from langchain_core.messages import HumanMessage\n",
-    "\n",
    "chat_history = []\n",
    "\n",
    "question = \"What is Task Decomposition?\"\n",
-    "ai_msg_1 = rag_chain.invoke({\"input\": question, \"chat_history\": chat_history})\n",
-    "chat_history.extend([HumanMessage(content=question), ai_msg_1[\"answer\"]])\n",
+    "ai_msg = rag_chain.invoke({\"question\": question, \"chat_history\": chat_history})\n",
+    "chat_history.extend([HumanMessage(content=question), ai_msg])\n",
    "\n",
    "second_question = \"What are common ways of doing it?\"\n",
-    "ai_msg_2 = rag_chain.invoke({\"input\": second_question, \"chat_history\": chat_history})\n",
-    "\n",
-    "print(ai_msg_2[\"answer\"])"
+    "rag_chain.invoke({\"question\": second_question, \"chat_history\": chat_history})"
   ]
  },
  {
@@ -315,29 +344,18 @@
   "id": "53263a65-4de2-4dd8-9291-6a8169ab6f1d",
   "metadata": {},
   "source": [
-    ":::{.callout-tip}\n",
+    ":::tip\n",
    "\n",
-    "Check out the [LangSmith trace](https://smith.langchain.com/public/243301e4-4cc5-4e52-a6e7-8cfe9208398d/r) \n",
+    "Check out the [LangSmith trace](https://smith.langchain.com/public/b3001782-bb30-476a-886b-12da17ec258f/r) \n",
    "\n",
    ":::"
   ]
  },
  {
   "cell_type": "markdown",
-   "id": "0ab1ded4-76d9-453f-9b9b-db9a4560c737",
+   "id": "fdf6c7e0-84f8-4747-b2ae-e84315152bd9",
   "metadata": {},
   "source": [
-    "## Tying it together"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8a08a5ea-df5b-4547-93c6-2a3940dd5c3e",
-   "metadata": {},
-   "source": [
-    "\n",
-    "![](../../../static/img/conversational_retrieval_chain.png)\n",
-    "\n",
    "Here we've gone over how to add application logic for incorporating historical outputs, but we're still manually updating the chat history and inserting it into each input. In a real Q&A application we'll want some way of persisting chat history and some way of automatically inserting and updating it.\n",
    "\n",
    "For this we can use:\n",
@@ -345,166 +363,23 @@
    "- [BaseChatMessageHistory](/docs/modules/memory/chat_messages/): Store chat history.\n",
    "- [RunnableWithMessageHistory](/docs/expression_language/how_to/message_history): Wrapper for an LCEL chain and a `BaseChatMessageHistory` that handles injecting chat history into inputs and updating it after each invocation.\n",
    "\n",
-    "For a detailed walkthrough of how to use these classes together to create a stateful conversational chain, head to the [How to add message history (memory)](/docs/expression_language/how_to/message_history) LCEL page.\n",
-    "\n",
-    "Below, we implement a simple example of the second option, in which chat histories are stored in a simple dict.\n",
-    "\n",
-    "For convenience, we tie together all of the necessary steps in a single code cell:"
+    "For a detailed walkthrough of how to use these classes together to create a stateful conversational chain, head to the [How to add message history (memory)](/docs/expression_language/how_to/message_history) LCEL page."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
-   "id": "71c32048-1a41-465f-a9e2-c4affc332fd9",
+   "execution_count": null,
+   "id": "1f67a60a-0a31-4315-9cce-19c78d658f6a",
   "metadata": {},
   "outputs": [],
-   "source": [
-    "import bs4\n",
-    "from langchain import hub\n",
-    "from langchain.chains import create_history_aware_retriever, create_retrieval_chain\n",
-    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
-    "from langchain_community.chat_message_histories import ChatMessageHistory\n",
-    "from langchain_community.document_loaders import WebBaseLoader\n",
-    "from langchain_community.vectorstores import Chroma\n",
-    "from langchain_core.chat_history import BaseChatMessageHistory\n",
-    "from langchain_core.output_parsers import StrOutputParser\n",
-    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
-    "from langchain_core.runnables import RunnablePassthrough\n",
-    "from langchain_core.runnables.history import RunnableWithMessageHistory\n",
-    "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
-    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
-    "\n",
-    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
-    "\n",
-    "\n",
-    "### Construct retriever ###\n",
-    "loader = WebBaseLoader(\n",
-    "    web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n",
-    "    bs_kwargs=dict(\n",
-    "        parse_only=bs4.SoupStrainer(\n",
-    "            class_=(\"post-content\", \"post-title\", \"post-header\")\n",
-    "        )\n",
-    "    ),\n",
-    ")\n",
-    "docs = loader.load()\n",
-    "\n",
-    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
-    "splits = text_splitter.split_documents(docs)\n",
-    "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n",
-    "retriever = vectorstore.as_retriever()\n",
-    "\n",
-    "\n",
-    "### Contextualize question ###\n",
-    "contextualize_q_system_prompt = \"\"\"Given a chat history and the latest user question \\\n",
-    "which might reference context in the chat history, formulate a standalone question \\\n",
-    "which can be understood without the chat history. Do NOT answer the question, \\\n",
-    "just reformulate it if needed and otherwise return it as is.\"\"\"\n",
-    "contextualize_q_prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\"system\", contextualize_q_system_prompt),\n",
-    "        MessagesPlaceholder(\"chat_history\"),\n",
-    "        (\"human\", \"{input}\"),\n",
-    "    ]\n",
-    ")\n",
-    "history_aware_retriever = create_history_aware_retriever(\n",
-    "    llm, retriever, contextualize_q_prompt\n",
-    ")\n",
-    "\n",
-    "\n",
-    "### Answer question ###\n",
-    "qa_system_prompt = \"\"\"You are an assistant for question-answering tasks. \\\n",
-    "Use the following pieces of retrieved context to answer the question. \\\n",
-    "If you don't know the answer, just say that you don't know. \\\n",
-    "Use three sentences maximum and keep the answer concise.\\\n",
-    "\n",
-    "{context}\"\"\"\n",
-    "qa_prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\"system\", qa_system_prompt),\n",
-    "        MessagesPlaceholder(\"chat_history\"),\n",
-    "        (\"human\", \"{input}\"),\n",
-    "    ]\n",
-    ")\n",
-    "question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)\n",
-    "\n",
-    "rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)\n",
-    "\n",
-    "\n",
-    "### Statefully manage chat history ###\n",
-    "store = {}\n",
-    "\n",
-    "\n",
-    "def get_session_history(session_id: str) -> BaseChatMessageHistory:\n",
-    "    if session_id not in store:\n",
-    "        store[session_id] = ChatMessageHistory()\n",
-    "    return store[session_id]\n",
-    "\n",
-    "\n",
-    "conversational_rag_chain = RunnableWithMessageHistory(\n",
-    "    rag_chain,\n",
-    "    get_session_history,\n",
-    "    input_messages_key=\"input\",\n",
-    "    history_messages_key=\"chat_history\",\n",
-    "    output_messages_key=\"answer\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "6d0a7a73-d151-47d9-9e99-b4f3291c0322",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents or models handle difficult tasks by dividing them into more manageable subtasks. It can be achieved through methods like Chain of Thought (CoT) or Tree of Thoughts, which guide the model in thinking step by step or exploring multiple reasoning possibilities at each step.'"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "conversational_rag_chain.invoke(\n",
-    "    {\"input\": \"What is Task Decomposition?\"},\n",
-    "    config={\n",
-    "        \"configurable\": {\"session_id\": \"abc123\"}\n",
-    "    },  # constructs a key \"abc123\" in `store`.\n",
-    ")[\"answer\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "17021822-896a-4513-a17d-1d20b1c5381c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Task decomposition can be done in common ways such as using Language Model (LLM) with simple prompting, task-specific instructions, or human inputs. For example, LLM can be guided with prompts like \"Steps for XYZ\" to break down tasks, or specific instructions like \"Write a story outline\" can be given for task decomposition. Additionally, human inputs can also be utilized to decompose tasks into smaller, more manageable steps.'"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "conversational_rag_chain.invoke(\n",
-    "    {\"input\": \"What are common ways of doing it?\"},\n",
-    "    config={\"configurable\": {\"session_id\": \"abc123\"}},\n",
-    ")[\"answer\"]"
-   ]
+   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "poetry-venv",
   "language": "python",
-   "name": "python3"
+   "name": "poetry-venv"
  },
  "language_info": {
   "codemirror_mode": {
@@ -516,7 +391,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/use_cases/question_answering/quickstart.mdx
+++ b/docs/docs/use_cases/question_answering/quickstart.mdx
@@ -24,7 +24,7 @@ introduction](../../../docs/use_cases/question_answering/), which has
 two main components:

 **Indexing**: a pipeline for ingesting data from a source and indexing
-it. _This usually happens offline._
+it. *This usually happens offline.*

 **Retrieval and generation**: the actual RAG chain, which takes the user
 query at run time and retrieves the relevant data from the index, then
@@ -77,7 +77,7 @@ We’ll use the following packages:
 %pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai chromadb bs4
 ```

-We need to set environment variable `OPENAI_API_KEY` for the embeddings model, which can be done
+We need to set environment variable `OPENAI_API_KEY`, which can be done
 directly or loaded from a `.env` file like so:

 ```python
@@ -125,13 +125,10 @@ from langchain_community.document_loaders import WebBaseLoader
 from langchain_community.vectorstores import Chroma
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
-from langchain_openai import OpenAIEmbeddings
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 ```

-import ChatModelTabs from "@theme/ChatModelTabs";
-
-<ChatModelTabs customVarName="llm" />

 ```python
 # Load, chunk and index the contents of the blog.
@@ -152,6 +149,8 @@ vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings
 # Retrieve and generate using the relevant snippets of the blog.
 retriever = vectorstore.as_retriever()
 prompt = hub.pull("rlm/rag-prompt")
+llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
+

 def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
@@ -165,11 +164,12 @@ rag_chain = (
 )
 ```

+
 ```python
 rag_chain.invoke("What is Task Decomposition?")
 ```

-```text
+``` text
 'Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It can be done through prompting techniques like Chain of Thought or Tree of Thoughts, or by using task-specific instructions or human inputs. Task decomposition helps agents plan ahead and manage complicated tasks more effectively.'
 ```

@@ -219,11 +219,12 @@ loader = WebBaseLoader(
 docs = loader.load()
 ```

+
 ```python
 len(docs[0].page_content)
 ```

-```text
+``` text
 42824
 ```

@@ -231,11 +232,11 @@ len(docs[0].page_content)
 print(docs[0].page_content[:500])
 ```

-```text
+``` text


      LLM Powered Autonomous Agents
-
+    
 Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


@@ -247,14 +248,13 @@ In
 ### Go deeper

 `DocumentLoader`: Object that loads data from a source as list of
-`Documents`.
-
+`Documents`. 
 - [Docs](../../../docs/modules/data_connection/document_loaders/):
-  Detailed documentation on how to use `DocumentLoaders`.
+Detailed documentation on how to use `DocumentLoaders`. 
 - [Integrations](../../../docs/integrations/document_loaders/): 160+
-  integrations to choose from.
+integrations to choose from. 
 - [Interface](https://api.python.langchain.com/en/latest/document_loaders/langchain_core.document_loaders.base.BaseLoader.html):
-  API reference  for the base interface.
+API reference  for the base interface.

 ## 2. Indexing: Split {#indexing-split}

@@ -289,11 +289,12 @@ text_splitter = RecursiveCharacterTextSplitter(
 all_splits = text_splitter.split_documents(docs)
 ```

+
 ```python
 len(all_splits)
 ```

-```text
+``` text
 66
 ```

@@ -301,7 +302,7 @@ len(all_splits)
 len(all_splits[0].page_content)
 ```

-```text
+``` text
 969
 ```

@@ -309,7 +310,7 @@ len(all_splits[0].page_content)
 all_splits[10].metadata
 ```

-```text
+``` text
 {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 7056}
 ```
@@ -317,20 +318,18 @@ all_splits[10].metadata
 ### Go deeper

 `TextSplitter`: Object that splits a list of `Document`s into smaller
-chunks. Subclass of `DocumentTransformer`s.
-
+chunks. Subclass of `DocumentTransformer`s. 
 - Explore `Context-aware splitters`, which keep the location (“context”) of each
-  split in the original `Document`: - [Markdown
-  files](../../../docs/modules/data_connection/document_transformers/markdown_header_metadata)
- [Code (py or js)](../../../docs/integrations/document_loaders/source_code)
- [Scientific papers](../../../docs/integrations/document_loaders/grobid)
+split in the original `Document`: - [Markdown
+files](../../../docs/modules/data_connection/document_transformers/markdown_header_metadata) 
+- [Code (py or js)](../../../docs/integrations/document_loaders/source_code) 
+- [Scientific papers](../../../docs/integrations/document_loaders/grobid) 
 - [Interface](https://api.python.langchain.com/en/latest/base/langchain_text_splitters.base.TextSplitter.html): API reference for the base interface.

 `DocumentTransformer`: Object that performs a transformation on a list
-of `Document`s.
-
- [Docs](../../../docs/modules/data_connection/document_transformers/): Detailed documentation on how to use `DocumentTransformers`
- [Integrations](../../../docs/integrations/document_transformers/)
+of `Document`s. 
+- [Docs](../../../docs/modules/data_connection/document_transformers/): Detailed documentation on how to use `DocumentTransformers` 
+- [Integrations](../../../docs/integrations/document_transformers/) 
 - [Interface](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.transformers.BaseDocumentTransformer.html): API reference for the base interface.

 ## 3. Indexing: Store {#indexing-store}
@@ -361,17 +360,15 @@ vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbedd
 ### Go deeper

 `Embeddings`: Wrapper around a text embedding model, used for converting
-text to embeddings.
-
- [Docs](../../../docs/modules/data_connection/text_embedding): Detailed documentation on how to use embeddings.
- [Integrations](../../../docs/integrations/text_embedding/): 30+ integrations to choose from.
+text to embeddings. 
+- [Docs](../../../docs/modules/data_connection/text_embedding): Detailed documentation on how to use embeddings. 
+- [Integrations](../../../docs/integrations/text_embedding/): 30+ integrations to choose from. 
 - [Interface](https://api.python.langchain.com/en/latest/embeddings/langchain_core.embeddings.Embeddings.html): API reference for the base interface.

 `VectorStore`: Wrapper around a vector database, used for storing and
-querying embeddings.
-
- [Docs](../../../docs/modules/data_connection/vectorstores/): Detailed documentation on how to use vector stores.
- [Integrations](../../../docs/integrations/vectorstores/): 40+ integrations to choose from.
+querying embeddings. 
+- [Docs](../../../docs/modules/data_connection/vectorstores/): Detailed documentation on how to use vector stores. 
+- [Integrations](../../../docs/integrations/vectorstores/): 40+ integrations to choose from. 
 - [Interface](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html): API reference for the base interface.

 This completes the **Indexing** portion of the pipeline. At this point
@@ -402,15 +399,17 @@ facilitate retrieval. Any `VectorStore` can easily be turned into a
 retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
 ```

+
 ```python
 retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")
 ```

+
 ```python
 len(retrieved_docs)
 ```

-```text
+``` text
 6
 ```

@@ -418,7 +417,7 @@ len(retrieved_docs)
 print(retrieved_docs[0].page_content)
 ```

-```text
+``` text
 Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
 Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.
 ```
@@ -430,27 +429,27 @@ to do retrieval, too.

 `Retriever`: An object that returns `Document`s given a text query

- [Docs](../../../docs/modules/data_connection/retrievers/): Further
-  documentation on the interface and built-in retrieval techniques.
-  Some of which include:
-  - `MultiQueryRetriever` [generates variants of the input
-    question](../../../docs/modules/data_connection/retrievers/MultiQueryRetriever)
-    to improve retrieval hit rate.
-  - `MultiVectorRetriever` (diagram below) instead generates
-    [variants of the
-    embeddings](../../../docs/modules/data_connection/retrievers/multi_vector),
-    also in order to improve retrieval hit rate.
-  - `Max marginal relevance` selects for [relevance and
-    diversity](https://www.cs.cmu.edu/~jgc/publication/The_Use_MMR_Diversity_Based_LTMIR_1998.pdf)
-    among the retrieved documents to avoid passing in duplicate
-    context.
-  - Documents can be filtered during vector store retrieval using
-    metadata filters, such as with a [Self Query
-    Retriever](../../../docs/modules/data_connection/retrievers/self_query).
- [Integrations](../../../docs/integrations/retrievers/): Integrations
-  with retrieval services.
- [Interface](https://api.python.langchain.com/en/latest/retrievers/langchain_core.retrievers.BaseRetriever.html):
-  API reference for the base interface.
+-   [Docs](../../../docs/modules/data_connection/retrievers/): Further
+    documentation on the interface and built-in retrieval techniques.
+    Some of which include:
+    -   `MultiQueryRetriever` [generates variants of the input
+        question](../../../docs/modules/data_connection/retrievers/MultiQueryRetriever)
+        to improve retrieval hit rate.
+    -   `MultiVectorRetriever` (diagram below) instead generates
+        [variants of the
+        embeddings](../../../docs/modules/data_connection/retrievers/multi_vector),
+        also in order to improve retrieval hit rate.
+    -   `Max marginal relevance` selects for [relevance and
+        diversity](https://www.cs.cmu.edu/~jgc/publication/The_Use_MMR_Diversity_Based_LTMIR_1998.pdf)
+        among the retrieved documents to avoid passing in duplicate
+        context.
+    -   Documents can be filtered during vector store retrieval using
+        metadata filters, such as with a [Self Query
+        Retriever](../../../docs/modules/data_connection/retrievers/self_query).
+-   [Integrations](../../../docs/integrations/retrievers/): Integrations
+    with retrieval services.
+-   [Interface](https://api.python.langchain.com/en/latest/retrievers/langchain_core.retrievers.BaseRetriever.html):
+    API reference for the base interface.

 ## 5. Retrieval and Generation: Generate {#retrieval-and-generation-generate}

@@ -461,13 +460,34 @@ parses the output.
 We’ll use the gpt-3.5-turbo OpenAI chat model, but any LangChain `LLM`
 or `ChatModel` could be substituted in.

-import Tabs from "@theme/Tabs";
-import TabItem from "@theme/TabItem";
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';

-<ChatModelTabs
-  customVarName="llm"
-  anthropicParams={`"model="claude-3-sonnet-20240229", temperature=0.2, max_tokens=1024"`}
-/>
+<Tabs>
+<TabItem value="openai" label="OpenAI" default>
+
+```python
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0)
+```
+
+</TabItem>
+<TabItem value="local" label="Anthropic">
+
+```python
+%pip install -qU langchain-anthropic
+```
+
+
+```python
+from langchain_anthropic import ChatAnthropic
+
+llm = ChatAnthropic(model="claude-3-sonnet-20240229", temperature=0.2, max_tokens=1024)
+```
+
+</TabItem>
+</Tabs>

 We’ll use a prompt for RAG that is checked into the LangChain prompt hub
 ([here](https://smith.langchain.com/hub/rlm/rag-prompt)).
@@ -478,6 +498,7 @@ from langchain import hub
 prompt = hub.pull("rlm/rag-prompt")
 ```

+
 ```python
 example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
@@ -485,7 +506,7 @@ example_messages = prompt.invoke(
 example_messages
 ```

-```text
+``` text
 [HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:")]
 ```

@@ -493,10 +514,10 @@ example_messages
 print(example_messages[0].content)
 ```

-```text
+``` text
 You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
-Question: filler question
-Context: filler context
+Question: filler question 
+Context: filler context 
 Answer:
 ```

@@ -522,12 +543,13 @@ rag_chain = (
 )
 ```

+
 ```python
 for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)
 ```

-```text
+``` text
 Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks, allowing for easier interpretation and execution by autonomous agents or models. Task decomposition can be done through various methods, such as using prompting techniques, task-specific instructions, or human inputs.
 ```

@@ -539,16 +561,14 @@ trace](https://smith.langchain.com/public/1799e8db-8a6d-4eb2-84d5-46e8d7d5a99b/r
 #### Choosing a model

 `ChatModel`: An LLM-backed chat model. Takes in a sequence of messages
-and returns a message.
-
+and returns a message. 
 - [Docs](../../../docs/modules/model_io/chat/)
- [Integrations](../../../docs/integrations/chat/): 25+ integrations to choose from.
+- [Integrations](../../../docs/integrations/chat/): 25+ integrations to choose from. 
 - [Interface](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.chat_models.BaseChatModel.html): API reference for the base interface.

-`LLM`: A text-in-text-out LLM. Takes in a string and returns a string.
-
- [Docs](../../../docs/modules/model_io/llms)
- [Integrations](../../../docs/integrations/llms): 75+ integrations to choose from.
+`LLM`: A text-in-text-out LLM. Takes in a string and returns a string. 
+- [Docs](../../../docs/modules/model_io/llms) 
+- [Integrations](../../../docs/integrations/llms): 75+ integrations to choose from. 
 - [Interface](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.llms.BaseLLM.html): API reference for the base interface.

 See a guide on RAG with locally-running models
@@ -585,7 +605,7 @@ rag_chain = (
 rag_chain.invoke("What is Task Decomposition?")
 ```

-```text
+``` text
 'Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks, allowing for a more systematic and organized approach to problem-solving. Thanks for asking!'
 ```

@@ -599,11 +619,11 @@ plenty of features, integrations, and extensions to explore in each of
 the above sections. Along from the **Go deeper** sources mentioned
 above, good next steps include:

- [Return
-  sources](../../../docs/use_cases/question_answering/sources): Learn
-  how to return source documents
- [Streaming](../../../docs/use_cases/question_answering/streaming):
-  Learn how to stream outputs and intermediate steps
- [Add chat
-  history](../../../docs/use_cases/question_answering/chat_history):
-  Learn how to add chat history to your app
+-   [Return
+    sources](../../../docs/use_cases/question_answering/sources): Learn
+    how to return source documents
+-   [Streaming](../../../docs/use_cases/question_answering/streaming):
+    Learn how to stream outputs and intermediate steps
+-   [Add chat
+    history](../../../docs/use_cases/question_answering/chat_history):
+    Learn how to add chat history to your app
--- a/docs/docs/use_cases/question_answering/sources.ipynb
+++ b/docs/docs/use_cases/question_answering/sources.ipynb
@@ -235,7 +235,7 @@
   "id": "b437da5d-ca09-4d15-9be2-c35e5a1ace77",
   "metadata": {},
   "source": [
-    ":::{.callout-tip}\n",
+    ":::tip\n",
    "\n",
    "Check out the [LangSmith trace](https://smith.langchain.com/public/007d7e01-cb62-4a84-8b71-b24767f953ee/r)\n",
    "\n",
--- a/docs/scripts/check_imports.py
+++ b/docs/scripts/check_imports.py
@@ -1,130 +0,0 @@
-"""This script checks documentation for broken import statements."""
-import importlib
-import json
-import logging
-import os
-import re
-import warnings
-from pathlib import Path
-from typing import List, Tuple
-
-logger = logging.getLogger(__name__)
-
-DOCS_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs"
-import_pattern = re.compile(
-    r"import\s+(\w+)|from\s+([\w\.]+)\s+import\s+((?:\w+(?:,\s*)?)+|\(.*?\))", re.DOTALL
-)
-
-
-def _get_imports_from_code_cell(code_lines: str) -> List[Tuple[str, str]]:
-    """Get (module, import) statements from a single code cell."""
-    import_statements = []
-    for line in code_lines:
-        line = line.strip()
-        if line.startswith("#") or not line:
-            continue
-        # Join lines that end with a backslash
-        if line.endswith("\\"):
-            line = line[:-1].rstrip() + " "
-            continue
-        matches = import_pattern.findall(line)
-        for match in matches:
-            if match[0]:  # simple import statement
-                import_statements.append((match[0], ""))
-            else:  # from ___ import statement
-                module, items = match[1], match[2]
-                items_list = items.replace(" ", "").split(",")
-                for item in items_list:
-                    import_statements.append((module, item))
-    return import_statements
-
-
-def _extract_import_statements(notebook_path: str) -> List[Tuple[str, str]]:
-    """Get (module, import) statements from a Jupyter notebook."""
-    with open(notebook_path, "r", encoding="utf-8") as file:
-        notebook = json.load(file)
-    code_cells = [cell for cell in notebook["cells"] if cell["cell_type"] == "code"]
-    import_statements = []
-    for cell in code_cells:
-        code_lines = cell["source"]
-        import_statements.extend(_get_imports_from_code_cell(code_lines))
-    return import_statements
-
-
-def _get_bad_imports(import_statements: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
-    """Collect offending import statements."""
-    offending_imports = []
-    for module, item in import_statements:
-        try:
-            if item:
-                try:
-                    # submodule
-                    full_module_name = f"{module}.{item}"
-                    importlib.import_module(full_module_name)
-                except ModuleNotFoundError:
-                    # attribute
-                    try:
-                        imported_module = importlib.import_module(module)
-                        getattr(imported_module, item)
-                    except AttributeError:
-                        offending_imports.append((module, item))
-                except Exception:
-                    offending_imports.append((module, item))
-            else:
-                importlib.import_module(module)
-        except Exception:
-            offending_imports.append((module, item))
-
-    return offending_imports
-
-
-def _is_relevant_import(module: str) -> bool:
-    """Check if module is recognized."""
-    # Ignore things like langchain_{bla}, where bla is unrecognized.
-    recognized_packages = [
-        "langchain",
-        "langchain_core",
-        "langchain_community",
-        "langchain_experimental",
-        "langchain_text_splitters",
-    ]
-    return module.split(".")[0] in recognized_packages
-
-
-def _serialize_bad_imports(bad_files: list) -> str:
-    """Serialize bad imports to a string."""
-    bad_imports_str = ""
-    for file, bad_imports in bad_files:
-        bad_imports_str += f"File: {file}\n"
-        for module, item in bad_imports:
-            bad_imports_str += f"    {module}.{item}\n"
-    return bad_imports_str
-
-
-def check_notebooks(directory: str) -> list:
-    """Check notebooks for broken import statements."""
-    bad_files = []
-    for root, _, files in os.walk(directory):
-        for file in files:
-            if file.endswith(".ipynb") and not file.endswith("-checkpoint.ipynb"):
-                notebook_path = os.path.join(root, file)
-                import_statements = [
-                    (module, item)
-                    for module, item in _extract_import_statements(notebook_path)
-                    if _is_relevant_import(module)
-                ]
-                bad_imports = _get_bad_imports(import_statements)
-                if bad_imports:
-                    bad_files.append(
-                        (
-                            os.path.join(root, file),
-                            bad_imports,
-                        )
-                    )
-    return bad_files
-
-
-if __name__ == "__main__":
-    bad_files = check_notebooks(DOCS_DIR)
-    if bad_files:
-        raise ImportError("Found bad imports:\n" f"{_serialize_bad_imports(bad_files)}")
--- a/docs/src/theme/ChatModelTabs.js
+++ b/docs/src/theme/ChatModelTabs.js
@@ -1,4 +1,4 @@
-/* eslint-disable react/jsx-props-no-spreading, react/destructuring-assignment */
+/* eslint-disable react/jsx-props-no-spreading */
 import React from "react";
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
@@ -20,24 +20,7 @@ os.environ["${apiKeyName}"] = getpass.getpass()`;
 }

 /**
- * @typedef {Object} ChatModelTabsProps - Component props.
- * @property {string} [openaiParams] - Parameters for OpenAI chat model. Defaults to `model="gpt-3.5-turbo-0125"`
- * @property {string} [anthropicParams] - Parameters for Anthropic chat model. Defaults to `model="claude-3-sonnet-20240229"`
- * @property {string} [fireworksParams] - Parameters for Fireworks chat model. Defaults to `model="accounts/fireworks/models/mixtral-8x7b-instruct"`
- * @property {string} [mistralParams] - Parameters for Mistral chat model. Defaults to `model="mistral-large-latest"`
- * @property {string} [googleParams] - Parameters for Google chat model. Defaults to `model="gemini-pro"`
- * @property {string} [togetherParams] - Parameters for Google chat model. Defaults to `model="gemini-pro"`
- * @property {boolean} [hideOpenai] - Whether or not to hide OpenAI chat model.
- * @property {boolean} [hideAnthropic] - Whether or not to hide Anthropic chat model.
- * @property {boolean} [hideFireworks] - Whether or not to hide Fireworks chat model.
- * @property {boolean} [hideMistral] - Whether or not to hide Mistral chat model.
- * @property {boolean} [hideGoogle] - Whether or not to hide Google chat model.
- * @property {boolean} [hideTogether] - Whether or not to hide Together chat model.
- * @property {string} [customVarName] - Custom variable name for the model. Defaults to `model`.
- */
-
-/**
- * @param {ChatModelTabsProps} props - Component props.
+ * @param {{ openaiParams?: string, anthropicParams?: string, fireworksParams?: string, mistralParams?: string, googleParams?: string, hideOpenai?: boolean, hideAnthropic?: boolean, hideFireworks?: boolean, hideMistral?: boolean, hideGoogle?: boolean }} props
 */
 export default function ChatModelTabs(props) {
  const {
@@ -46,36 +29,24 @@ export default function ChatModelTabs(props) {
    fireworksParams,
    mistralParams,
    googleParams,
-    togetherParams,
    hideOpenai,
    hideAnthropic,
    hideFireworks,
    hideMistral,
    hideGoogle,
-    hideTogether,
-    customVarName,
  } = props;

-  const openAIParamsOrDefault = openaiParams ?? `model="gpt-3.5-turbo-0125"`;
-  const anthropicParamsOrDefault =
-    anthropicParams ?? `model="claude-3-sonnet-20240229"`;
-  const fireworksParamsOrDefault =
-    fireworksParams ??
-    `model="accounts/fireworks/models/mixtral-8x7b-instruct"`;
-  const mistralParamsOrDefault =
-    mistralParams ?? `model="mistral-large-latest"`;
-  const googleParamsOrDefault = googleParams ?? `model="gemini-pro"`;
-  const togetherParamsOrDefault =
-    togetherParams ??
-    `\n    base_url="https://api.together.xyz/v1",\n    api_key=os.environ["TOGETHER_API_KEY"],\n    model="mistralai/Mixtral-8x7B-Instruct-v0.1",`;
-
-  const llmVarName = customVarName ?? "model";
+  const openAIParamsOrDefault = openaiParams ?? `model="gpt-3.5-turbo-0125"`
+  const anthropicParamsOrDefault = anthropicParams ?? `model="claude-3-sonnet-20240229"`
+  const fireworksParamsOrDefault = fireworksParams ?? `model="accounts/fireworks/models/mixtral-8x7b-instruct"`
+  const mistralParamsOrDefault = mistralParams ?? `model="mistral-large-latest"`
+  const googleParamsOrDefault = googleParams ?? `model="gemini-pro"`

  const tabItems = [
    {
      value: "OpenAI",
      label: "OpenAI",
-      text: `from langchain_openai import ChatOpenAI\n\n${llmVarName} = ChatOpenAI(${openAIParamsOrDefault})`,
+      text: `from langchain_openai import ChatOpenAI\n\nmodel = ChatOpenAI(${openAIParamsOrDefault})`,
      apiKeyName: "OPENAI_API_KEY",
      packageName: "langchain-openai",
      default: true,
@@ -84,7 +55,7 @@ export default function ChatModelTabs(props) {
    {
      value: "Anthropic",
      label: "Anthropic",
-      text: `from langchain_anthropic import ChatAnthropic\n\n${llmVarName} = ChatAnthropic(${anthropicParamsOrDefault})`,
+      text: `from langchain_anthropic import ChatAnthropic\n\nmodel = ChatAnthropic(${anthropicParamsOrDefault})`,
      apiKeyName: "ANTHROPIC_API_KEY",
      packageName: "langchain-anthropic",
      default: false,
@@ -93,7 +64,7 @@ export default function ChatModelTabs(props) {
    {
      value: "FireworksAI",
      label: "FireworksAI",
-      text: `from langchain_fireworks import ChatFireworks\n\n${llmVarName} = ChatFireworks(${fireworksParamsOrDefault})`,
+      text: `from langchain_fireworks import ChatFireworks\n\nmodel = ChatFireworks(${fireworksParamsOrDefault})`,
      apiKeyName: "FIREWORKS_API_KEY",
      packageName: "langchain-fireworks",
      default: false,
@@ -102,7 +73,7 @@ export default function ChatModelTabs(props) {
    {
      value: "MistralAI",
      label: "MistralAI",
-      text: `from langchain_mistralai import ChatMistralAI\n\n${llmVarName} = ChatMistralAI(${mistralParamsOrDefault})`,
+      text: `from langchain_mistralai import ChatMistralAI\n\nmodel = ChatMistralAI(${mistralParamsOrDefault})`,
      apiKeyName: "MISTRAL_API_KEY",
      packageName: "langchain-mistralai",
      default: false,
@@ -111,40 +82,22 @@ export default function ChatModelTabs(props) {
    {
      value: "Google",
      label: "Google",
-      text: `from langchain_google_genai import ChatGoogleGenerativeAI\n\n${llmVarName} = ChatGoogleGenerativeAI(${googleParamsOrDefault})`,
+      text: `from langchain_google_genai import ChatGoogleGenerativeAI\n\nmodel = ChatGoogleGenerativeAI(${googleParamsOrDefault})`,
      apiKeyName: "GOOGLE_API_KEY",
      packageName: "langchain-google-genai",
      default: false,
      shouldHide: hideGoogle,
-    },
-    {
-      value: "TogetherAI",
-      label: "TogetherAI",
-      text: `from langchain_openai import ChatOpenAI\n\n${llmVarName} = ChatOpenAI(${togetherParamsOrDefault})`,
-      apiKeyName: "TOGETHER_API_KEY",
-      packageName: "langchain-openai",
-      default: false,
-      shouldHide: hideTogether,
-    },
-  ];
+    }
+  ]

  return (
    <Tabs groupId="modelTabs">
-      {tabItems
-        .filter((tabItem) => !tabItem.shouldHide)
-        .map((tabItem) => (
-          <TabItem
-            value={tabItem.value}
-            label={tabItem.label}
-            default={tabItem.default}
-          >
-            <Setup
-              apiKeyName={tabItem.apiKeyName}
-              packageName={tabItem.packageName}
-            />
-            <CodeBlock language="python">{tabItem.text}</CodeBlock>
-          </TabItem>
-        ))}
+      {tabItems.filter((tabItem) => !tabItem.shouldHide).map((tabItem) => (
+        <TabItem value={tabItem.value} label={tabItem.label} default={tabItem.default}>
+          <Setup apiKeyName={tabItem.apiKeyName} packageName={tabItem.packageName} />
+          <CodeBlock language="python">{tabItem.text}</CodeBlock>
+        </TabItem>
+      ))}
    </Tabs>
  );
 }
--- a/docs/static/img/conversational_retrieval_chain.png
+++ b/docs/static/img/conversational_retrieval_chain.png
--- a/docs/vercel.json
+++ b/docs/vercel.json
@@ -92,10 +92,6 @@
      "source": "/docs/integrations/llms/huggingface_hub",
      "destination": "/docs/integrations/llms/huggingface_endpoint"
    },
-    {
-      "source": "/docs/integrations/llms/bigdl",
-      "destination": "/docs/integrations/llms/ipex_llm"
-    },
    {
      "source": "/docs/integrations/llms/watsonxllm",
      "destination": "/docs/integrations/llms/ibm_watsonx"
--- a/docs/vercel_build.sh
+++ b/docs/vercel_build.sh
@@ -4,9 +4,9 @@ yum -y update
 yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip -y

 # install quarto
-wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.4.552/quarto-1.4.552-linux-amd64.tar.gz
-tar -xzf quarto-1.4.552-linux-amd64.tar.gz
-export PATH=$PATH:$(pwd)/quarto-1.4.552/bin/
+wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.3.450/quarto-1.3.450-linux-amd64.tar.gz
+tar -xzf quarto-1.3.450-linux-amd64.tar.gz
+export PATH=$PATH:$(pwd)/quarto-1.3.450/bin/


 # setup python env
--- a/libs/community/langchain_community/agent_toolkits/init.py
+++ b/libs/community/langchain_community/agent_toolkits/init.py
@@ -1,14 +1,12 @@
 """**Toolkits** are sets of tools that can be used to interact with
 various services and APIs.
 """
-
 import importlib
 from typing import Any

 _module_lookup = {
    "AINetworkToolkit": "langchain_community.agent_toolkits.ainetwork.toolkit",
    "AmadeusToolkit": "langchain_community.agent_toolkits.amadeus.toolkit",
-    "AzureAiServicesToolkit": "langchain_community.agent_toolkits.azure_ai_services",
    "AzureCognitiveServicesToolkit": "langchain_community.agent_toolkits.azure_cognitive_services",  # noqa: E501
    "CogniswitchToolkit": "langchain_community.agent_toolkits.cogniswitch.toolkit",
    "ConneryToolkit": "langchain_community.agent_toolkits.connery",
--- a/libs/community/langchain_community/agent_toolkits/azure_ai_services.py
+++ b/libs/community/langchain_community/agent_toolkits/azure_ai_services.py
@@ -1,31 +0,0 @@
-from __future__ import annotations
-
-from typing import List
-
-from langchain_core.tools import BaseTool
-
-from langchain_community.agent_toolkits.base import BaseToolkit
-from langchain_community.tools.azure_ai_services import (
-    AzureAiServicesDocumentIntelligenceTool,
-    AzureAiServicesImageAnalysisTool,
-    AzureAiServicesSpeechToTextTool,
-    AzureAiServicesTextAnalyticsForHealthTool,
-    AzureAiServicesTextToSpeechTool,
-)
-
-
-class AzureAiServicesToolkit(BaseToolkit):
-    """Toolkit for Azure AI Services."""
-
-    def get_tools(self) -> List[BaseTool]:
-        """Get the tools in the toolkit."""
-
-        tools: List[BaseTool] = [
-            AzureAiServicesDocumentIntelligenceTool(),
-            AzureAiServicesImageAnalysisTool(),
-            AzureAiServicesSpeechToTextTool(),
-            AzureAiServicesTextToSpeechTool(),
-            AzureAiServicesTextAnalyticsForHealthTool(),
-        ]
-
-        return tools
--- a/libs/community/langchain_community/agent_toolkits/sql/base.py
+++ b/libs/community/langchain_community/agent_toolkits/sql/base.py
@@ -150,18 +150,17 @@ def create_sql_agent(
            prompt = prompt.partial(top_k=str(top_k))
        if "dialect" in prompt.input_variables:
            prompt = prompt.partial(dialect=toolkit.dialect)
-        if any(key in prompt.input_variables for key in ["table_info", "table_names"]):
-            db_context = toolkit.get_context()
-            if "table_info" in prompt.input_variables:
-                prompt = prompt.partial(table_info=db_context["table_info"])
-                tools = [
-                    tool for tool in tools if not isinstance(tool, InfoSQLDatabaseTool)
-                ]
-            if "table_names" in prompt.input_variables:
-                prompt = prompt.partial(table_names=db_context["table_names"])
-                tools = [
-                    tool for tool in tools if not isinstance(tool, ListSQLDatabaseTool)
-                ]
+        db_context = toolkit.get_context()
+        if "table_info" in prompt.input_variables:
+            prompt = prompt.partial(table_info=db_context["table_info"])
+            tools = [
+                tool for tool in tools if not isinstance(tool, InfoSQLDatabaseTool)
+            ]
+        if "table_names" in prompt.input_variables:
+            prompt = prompt.partial(table_names=db_context["table_names"])
+            tools = [
+                tool for tool in tools if not isinstance(tool, ListSQLDatabaseTool)
+            ]

    if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
        if prompt is None:
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -403,7 +403,7 @@ class _RedisCacheBase(BaseCache, ABC):
        if results:
            for _, text in results.items():
                try:
-                    generations.append(loads(cast(str, text)))
+                    generations.append(loads(text))
                except Exception:
                    logger.warning(
                        "Retrieving a cache value that could not be deserialized "
--- a/libs/community/langchain_community/chat_message_histories/dynamodb.py
+++ b/libs/community/langchain_community/chat_message_histories/dynamodb.py
@@ -43,8 +43,6 @@ class DynamoDBChatMessageHistory(BaseChatMessageHistory):
            table. DynamoDB handles deletion of expired items without consuming
            write throughput. To enable this feature on the table, follow the
            [AWS DynamoDB documentation](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/time-to-live-ttl-how-to.html)
-        history_size: Maximum number of messages to store. If None then there is no
-            limit. If not None then only the latest `history_size` messages are stored.
    """

    def __init__(
@@ -58,7 +56,6 @@ class DynamoDBChatMessageHistory(BaseChatMessageHistory):
        kms_key_id: Optional[str] = None,
        ttl: Optional[int] = None,
        ttl_key_name: str = "expireAt",
-        history_size: Optional[int] = None,
    ):
        if boto3_session:
            client = boto3_session.resource("dynamodb", endpoint_url=endpoint_url)
@@ -78,7 +75,6 @@ class DynamoDBChatMessageHistory(BaseChatMessageHistory):
        self.key: Dict = key or {primary_key_name: session_id}
        self.ttl = ttl
        self.ttl_key_name = ttl_key_name
-        self.history_size = history_size

        if kms_key_id:
            try:
@@ -107,7 +103,7 @@ class DynamoDBChatMessageHistory(BaseChatMessageHistory):
            )

    @property
-    def messages(self) -> List[BaseMessage]:
+    def messages(self) -> List[BaseMessage]:  # type: ignore
        """Retrieve the messages from DynamoDB"""
        try:
            from botocore.exceptions import ClientError
@@ -133,13 +129,6 @@ class DynamoDBChatMessageHistory(BaseChatMessageHistory):
        messages = messages_from_dict(items)
        return messages

-    @messages.setter
-    def messages(self, messages: List[BaseMessage]) -> None:
-        raise NotImplementedError(
-            "Direct assignment to 'messages' is not allowed."
-            " Use the 'add_messages' instead."
-        )
-
    def add_message(self, message: BaseMessage) -> None:
        """Append the message to the record in DynamoDB"""
        try:
@@ -153,9 +142,6 @@ class DynamoDBChatMessageHistory(BaseChatMessageHistory):
        _message = message_to_dict(message)
        messages.append(_message)

-        if self.history_size:
-            messages = messages[-self.history_size :]
-
        try:
            if self.ttl:
                import time
--- a/libs/community/langchain_community/chat_message_histories/elasticsearch.py
+++ b/libs/community/langchain_community/chat_message_histories/elasticsearch.py
@@ -143,7 +143,7 @@ class ElasticsearchChatMessageHistory(BaseChatMessageHistory):
        return es_client

    @property
-    def messages(self) -> List[BaseMessage]:
+    def messages(self) -> List[BaseMessage]:  # type: ignore[override]
        """Retrieve the messages from Elasticsearch"""
        try:
            from elasticsearch import ApiError
@@ -167,13 +167,6 @@ class ElasticsearchChatMessageHistory(BaseChatMessageHistory):

        return messages_from_dict(items)

-    @messages.setter
-    def messages(self, messages: List[BaseMessage]) -> None:
-        raise NotImplementedError(
-            "Direct assignment to 'messages' is not allowed."
-            " Use the 'add_messages' instead."
-        )
-
    def add_message(self, message: BaseMessage) -> None:
        """Add a message to the chat session in Elasticsearch"""
        try:
--- a/libs/community/langchain_community/chat_models/azureml_endpoint.py
+++ b/libs/community/langchain_community/chat_models/azureml_endpoint.py
@@ -1,37 +1,16 @@
 import json
-import warnings
-from typing import (
-    Any,
-    AsyncIterator,
-    Dict,
-    Iterator,
-    List,
-    Mapping,
-    Optional,
-    Type,
-    cast,
-)
+from typing import Any, Dict, List, Optional, cast

-from langchain_core.callbacks import (
-    AsyncCallbackManagerForLLMRun,
-    CallbackManagerForLLMRun,
-)
+from langchain_core.callbacks.manager import CallbackManagerForLLMRun
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.messages import (
    AIMessage,
-    AIMessageChunk,
    BaseMessage,
-    BaseMessageChunk,
    ChatMessage,
-    ChatMessageChunk,
-    FunctionMessageChunk,
    HumanMessage,
-    HumanMessageChunk,
    SystemMessage,
-    SystemMessageChunk,
-    ToolMessageChunk,
 )
-from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.outputs import ChatGeneration, ChatResult

 from langchain_community.llms.azureml_endpoint import (
    AzureMLBaseEndpoint,
@@ -46,12 +25,12 @@ class LlamaContentFormatter(ContentFormatterBase):
    def __init__(self) -> None:
        raise TypeError(
            "`LlamaContentFormatter` is deprecated for chat models. Use "
-            "`CustomOpenAIContentFormatter` instead."
+            "`LlamaChatContentFormatter` instead."
        )


-class CustomOpenAIChatContentFormatter(ContentFormatterBase):
-    """Chat Content formatter for models with OpenAI like API scheme."""
+class LlamaChatContentFormatter(ContentFormatterBase):
+    """Content formatter for `LLaMA`."""

    SUPPORTED_ROLES: List[str] = ["user", "assistant", "system"]

@@ -76,7 +55,7 @@ class CustomOpenAIChatContentFormatter(ContentFormatterBase):
            }
        elif (
            isinstance(message, ChatMessage)
-            and message.role in CustomOpenAIChatContentFormatter.SUPPORTED_ROLES
+            and message.role in LlamaChatContentFormatter.SUPPORTED_ROLES
        ):
            return {
                "role": message.role,
@@ -84,7 +63,7 @@ class CustomOpenAIChatContentFormatter(ContentFormatterBase):
            }
        else:
            supported = ",".join(
-                [role for role in CustomOpenAIChatContentFormatter.SUPPORTED_ROLES]
+                [role for role in LlamaChatContentFormatter.SUPPORTED_ROLES]
            )
            raise ValueError(
                f"""Received unsupported role. 
@@ -93,7 +72,7 @@ class CustomOpenAIChatContentFormatter(ContentFormatterBase):

    @property
    def supported_api_types(self) -> List[AzureMLEndpointApiType]:
-        return [AzureMLEndpointApiType.dedicated, AzureMLEndpointApiType.serverless]
+        return [AzureMLEndpointApiType.realtime, AzureMLEndpointApiType.serverless]

    def format_messages_request_payload(
        self,
@@ -103,13 +82,10 @@ class CustomOpenAIChatContentFormatter(ContentFormatterBase):
    ) -> bytes:
        """Formats the request according to the chosen api"""
        chat_messages = [
-            CustomOpenAIChatContentFormatter._convert_message_to_dict(message)
+            LlamaChatContentFormatter._convert_message_to_dict(message)
            for message in messages
        ]
-        if api_type in [
-            AzureMLEndpointApiType.dedicated,
-            AzureMLEndpointApiType.realtime,
-        ]:
+        if api_type == AzureMLEndpointApiType.realtime:
            request_payload = json.dumps(
                {
                    "input_data": {
@@ -129,13 +105,10 @@ class CustomOpenAIChatContentFormatter(ContentFormatterBase):
    def format_response_payload(
        self,
        output: bytes,
-        api_type: AzureMLEndpointApiType = AzureMLEndpointApiType.dedicated,
+        api_type: AzureMLEndpointApiType = AzureMLEndpointApiType.realtime,
    ) -> ChatGeneration:
        """Formats response"""
-        if api_type in [
-            AzureMLEndpointApiType.dedicated,
-            AzureMLEndpointApiType.realtime,
-        ]:
+        if api_type == AzureMLEndpointApiType.realtime:
            try:
                choice = json.loads(output)["output"]
            except (KeyError, IndexError, TypeError) as e:
@@ -170,20 +143,6 @@ class CustomOpenAIChatContentFormatter(ContentFormatterBase):
        raise ValueError(f"`api_type` {api_type} is not supported by this formatter")


-class LlamaChatContentFormatter(CustomOpenAIChatContentFormatter):
-    """Deprecated: Kept for backwards compatibility
-
-    Chat Content formatter for Llama."""
-
-    def __init__(self) -> None:
-        super().__init__()
-        warnings.warn(
-            """`LlamaChatContentFormatter` will be deprecated in the future. 
-                Please use `CustomOpenAIChatContentFormatter` instead.  
-            """
-        )
-
-
 class MistralChatContentFormatter(LlamaChatContentFormatter):
    """Content formatter for `Mistral`."""

@@ -228,8 +187,8 @@ class AzureMLChatOnlineEndpoint(BaseChatModel, AzureMLBaseEndpoint):
    Example:
        .. code-block:: python
            azure_llm = AzureMLOnlineEndpoint(
-                endpoint_url="https://<your-endpoint>.<your_region>.inference.ml.azure.com/v1/chat/completions",
-                endpoint_api_type=AzureMLApiType.serverless,
+                endpoint_url="https://<your-endpoint>.<your_region>.inference.ml.azure.com/score",
+                endpoint_api_type=AzureMLApiType.realtime,
                endpoint_api_key="my-api-key",
                content_formatter=chat_content_formatter,
            )
@@ -280,143 +239,3 @@ class AzureMLChatOnlineEndpoint(BaseChatModel, AzureMLBaseEndpoint):
            response_payload, self.endpoint_api_type
        )
        return ChatResult(generations=[generations])
-
-    def _stream(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> Iterator[ChatGenerationChunk]:
-        self.endpoint_url = self.endpoint_url.replace("/chat/completions", "")
-        timeout = None if "timeout" not in kwargs else kwargs["timeout"]
-
-        import openai
-
-        params = {}
-        client_params = {
-            "api_key": self.endpoint_api_key.get_secret_value(),
-            "base_url": self.endpoint_url,
-            "timeout": timeout,
-            "default_headers": None,
-            "default_query": None,
-            "http_client": None,
-        }
-
-        client = openai.OpenAI(**client_params)
-        message_dicts = [
-            CustomOpenAIChatContentFormatter._convert_message_to_dict(m)
-            for m in messages
-        ]
-        params = {"stream": True, "stop": stop, "model": None, **kwargs}
-
-        default_chunk_class = AIMessageChunk
-        for chunk in client.chat.completions.create(messages=message_dicts, **params):
-            if not isinstance(chunk, dict):
-                chunk = chunk.dict()
-            if len(chunk["choices"]) == 0:
-                continue
-            choice = chunk["choices"][0]
-            chunk = _convert_delta_to_message_chunk(
-                choice["delta"], default_chunk_class
-            )
-            generation_info = {}
-            if finish_reason := choice.get("finish_reason"):
-                generation_info["finish_reason"] = finish_reason
-            logprobs = choice.get("logprobs")
-            if logprobs:
-                generation_info["logprobs"] = logprobs
-            default_chunk_class = chunk.__class__
-            chunk = ChatGenerationChunk(
-                message=chunk, generation_info=generation_info or None
-            )
-            if run_manager:
-                run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs)
-            yield chunk
-
-    async def _astream(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> AsyncIterator[ChatGenerationChunk]:
-        self.endpoint_url = self.endpoint_url.replace("/chat/completions", "")
-        timeout = None if "timeout" not in kwargs else kwargs["timeout"]
-
-        import openai
-
-        params = {}
-        client_params = {
-            "api_key": self.endpoint_api_key.get_secret_value(),
-            "base_url": self.endpoint_url,
-            "timeout": timeout,
-            "default_headers": None,
-            "default_query": None,
-            "http_client": None,
-        }
-
-        async_client = openai.AsyncOpenAI(**client_params)
-        message_dicts = [
-            CustomOpenAIChatContentFormatter._convert_message_to_dict(m)
-            for m in messages
-        ]
-        params = {"stream": True, "stop": stop, "model": None, **kwargs}
-
-        default_chunk_class = AIMessageChunk
-        async for chunk in await async_client.chat.completions.create(
-            messages=message_dicts, **params
-        ):
-            if not isinstance(chunk, dict):
-                chunk = chunk.dict()
-            if len(chunk["choices"]) == 0:
-                continue
-            choice = chunk["choices"][0]
-            chunk = _convert_delta_to_message_chunk(
-                choice["delta"], default_chunk_class
-            )
-            generation_info = {}
-            if finish_reason := choice.get("finish_reason"):
-                generation_info["finish_reason"] = finish_reason
-            logprobs = choice.get("logprobs")
-            if logprobs:
-                generation_info["logprobs"] = logprobs
-            default_chunk_class = chunk.__class__
-            chunk = ChatGenerationChunk(
-                message=chunk, generation_info=generation_info or None
-            )
-            if run_manager:
-                await run_manager.on_llm_new_token(
-                    token=chunk.text, chunk=chunk, logprobs=logprobs
-                )
-            yield chunk
-
-
-def _convert_delta_to_message_chunk(
-    _dict: Mapping[str, Any], default_class: Type[BaseMessageChunk]
-) -> BaseMessageChunk:
-    role = cast(str, _dict.get("role"))
-    content = cast(str, _dict.get("content") or "")
-    additional_kwargs: Dict = {}
-    if _dict.get("function_call"):
-        function_call = dict(_dict["function_call"])
-        if "name" in function_call and function_call["name"] is None:
-            function_call["name"] = ""
-        additional_kwargs["function_call"] = function_call
-    if _dict.get("tool_calls"):
-        additional_kwargs["tool_calls"] = _dict["tool_calls"]
-
-    if role == "user" or default_class == HumanMessageChunk:
-        return HumanMessageChunk(content=content)
-    elif role == "assistant" or default_class == AIMessageChunk:
-        return AIMessageChunk(content=content, additional_kwargs=additional_kwargs)
-    elif role == "system" or default_class == SystemMessageChunk:
-        return SystemMessageChunk(content=content)
-    elif role == "function" or default_class == FunctionMessageChunk:
-        return FunctionMessageChunk(content=content, name=_dict["name"])
-    elif role == "tool" or default_class == ToolMessageChunk:
-        return ToolMessageChunk(content=content, tool_call_id=_dict["tool_call_id"])
-    elif role or default_class == ChatMessageChunk:
-        return ChatMessageChunk(content=content, role=role)
-    else:
-        return default_class(content=content)
--- a/libs/community/langchain_community/chat_models/baidu_qianfan_endpoint.py
+++ b/libs/community/langchain_community/chat_models/baidu_qianfan_endpoint.py
@@ -271,7 +271,6 @@ class QianfanChatEndpoint(BaseChatModel):
                },
            )
        params = self._convert_prompt_msg_params(messages, **kwargs)
-        params["stop"] = stop
        response_payload = self.client.do(**params)
        lc_msg = _convert_dict_to_message(response_payload)
        gen = ChatGeneration(
@@ -317,7 +316,6 @@ class QianfanChatEndpoint(BaseChatModel):
                },
            )
        params = self._convert_prompt_msg_params(messages, **kwargs)
-        params["stop"] = stop
        response_payload = await self.client.ado(**params)
        lc_msg = _convert_dict_to_message(response_payload)
        generations = []
@@ -341,7 +339,6 @@ class QianfanChatEndpoint(BaseChatModel):
        **kwargs: Any,
    ) -> Iterator[ChatGenerationChunk]:
        params = self._convert_prompt_msg_params(messages, **kwargs)
-        params["stop"] = stop
        params["stream"] = True
        for res in self.client.do(**params):
            if res:
@@ -368,7 +365,6 @@ class QianfanChatEndpoint(BaseChatModel):
        **kwargs: Any,
    ) -> AsyncIterator[ChatGenerationChunk]:
        params = self._convert_prompt_msg_params(messages, **kwargs)
-        params["stop"] = stop
        params["stream"] = True
        async for res in await self.client.ado(**params):
            if res:
--- a/libs/community/langchain_community/chat_models/bedrock.py
+++ b/libs/community/langchain_community/chat_models/bedrock.py
@@ -1,5 +1,4 @@
 import re
-from collections import defaultdict
 from typing import Any, Dict, Iterator, List, Optional, Tuple, Union

 from langchain_core.callbacks import (
@@ -235,9 +234,10 @@ class BedrockChat(BaseChatModel, BedrockBase):
        **kwargs: Any,
    ) -> Iterator[ChatGenerationChunk]:
        provider = self._get_provider()
-        prompt, system, formatted_messages = None, None, None
-
+        system = None
+        formatted_messages = None
        if provider == "anthropic":
+            prompt = None
            system, formatted_messages = ChatPromptAdapter.format_messages(
                provider, messages
            )
@@ -265,17 +265,17 @@ class BedrockChat(BaseChatModel, BedrockBase):
        **kwargs: Any,
    ) -> ChatResult:
        completion = ""
-        llm_output: Dict[str, Any] = {"model_id": self.model_id}

        if self.streaming:
            for chunk in self._stream(messages, stop, run_manager, **kwargs):
                completion += chunk.text
        else:
            provider = self._get_provider()
-            prompt, system, formatted_messages = None, None, None
+            system = None
+            formatted_messages = None
            params: Dict[str, Any] = {**kwargs}
-
            if provider == "anthropic":
+                prompt = None
                system, formatted_messages = ChatPromptAdapter.format_messages(
                    provider, messages
                )
@@ -287,7 +287,7 @@ class BedrockChat(BaseChatModel, BedrockBase):
            if stop:
                params["stop_sequences"] = stop

-            completion, usage_info = self._prepare_input_and_invoke(
+            completion = self._prepare_input_and_invoke(
                prompt=prompt,
                stop=stop,
                run_manager=run_manager,
@@ -296,25 +296,10 @@ class BedrockChat(BaseChatModel, BedrockBase):
                **params,
            )

-            llm_output["usage"] = usage_info
-
        return ChatResult(
-            generations=[ChatGeneration(message=AIMessage(content=completion))],
-            llm_output=llm_output,
+            generations=[ChatGeneration(message=AIMessage(content=completion))]
        )

-    def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict:
-        final_usage: Dict[str, int] = defaultdict(int)
-        final_output = {}
-        for output in llm_outputs:
-            output = output or {}
-            usage = output.pop("usage", {})
-            for token_type, token_count in usage.items():
-                final_usage[token_type] += token_count
-            final_output.update(output)
-        final_output["usage"] = final_usage
-        return final_output
-
    def get_num_tokens(self, text: str) -> int:
        if self._model_is_anthropic:
            return get_num_tokens_anthropic(text)
--- a/libs/community/langchain_community/chat_models/cohere.py
+++ b/libs/community/langchain_community/chat_models/cohere.py
@@ -172,9 +172,9 @@ class ChatCohere(BaseChatModel, BaseCohere):
        request = get_cohere_chat_request(messages, **self._default_params, **kwargs)

        if hasattr(self.async_client, "chat_stream"):  # detect and support sdk v5
-            stream = await self.async_client.chat_stream(**request)
+            stream = self.async_client.chat_stream(**request)
        else:
-            stream = await self.async_client.chat(**request, stream=True)
+            stream = self.async_client.chat(**request, stream=True)

        async for data in stream:
            if data.event_type == "text-generation":
--- a/libs/community/langchain_community/chat_models/minimax.py
+++ b/libs/community/langchain_community/chat_models/minimax.py
@@ -12,7 +12,7 @@ from langchain_core.messages import (
    BaseMessage,
    HumanMessage,
 )
-from langchain_core.outputs import ChatGeneration, ChatResult
+from langchain_core.outputs import ChatResult

 from langchain_community.llms.minimax import MinimaxCommon
 from langchain_community.llms.utils import enforce_stop_tokens
@@ -81,8 +81,7 @@ class MiniMaxChat(MinimaxCommon, BaseChatModel):
        text = self._client.post(payload)

        # This is required since the stop are not enforced by the model parameters
-        text = text if stop is None else enforce_stop_tokens(text, stop)
-        return ChatResult(generations=[ChatGeneration(message=AIMessage(text))])
+        return text if stop is None else enforce_stop_tokens(text, stop)

    async def _agenerate(
        self,
--- a/libs/community/langchain_community/chat_models/moonshot.py
+++ b/libs/community/langchain_community/chat_models/moonshot.py
@@ -1,58 +0,0 @@
-"""Wrapper around Moonshot chat models."""
-from typing import Dict
-
-from langchain_core.pydantic_v1 import root_validator
-from langchain_core.utils import get_from_dict_or_env
-
-from langchain_community.chat_models import ChatOpenAI
-from langchain_community.llms.moonshot import MOONSHOT_SERVICE_URL_BASE, MoonshotCommon
-
-
-class MoonshotChat(MoonshotCommon, ChatOpenAI):
-    """Wrapper around Moonshot large language models.
-
-    To use, you should have the ``openai`` python package installed, and the
-    environment variable ``MOONSHOT_API_KEY`` set with your API key.
-    (Moonshot's chat API is compatible with OpenAI's SDK.)
-
-    Referenced from https://platform.moonshot.cn/docs
-
-    Example:
-        .. code-block:: python
-
-            from langchain_community.chat_models.moonshot import MoonshotChat
-
-            moonshot = MoonshotChat(model="moonshot-v1-8k")
-    """
-
-    @root_validator()
-    def validate_environment(cls, values: Dict) -> Dict:
-        """Validate that the environment is set up correctly."""
-        values["moonshot_api_key"] = get_from_dict_or_env(
-            values, "moonshot_api_key", "MOONSHOT_API_KEY"
-        )
-
-        try:
-            import openai
-
-        except ImportError:
-            raise ImportError(
-                "Could not import openai python package. "
-                "Please install it with `pip install openai`."
-            )
-
-        client_params = {
-            "api_key": values["moonshot_api_key"],
-            "base_url": values["base_url"]
-            if "base_url" in values
-            else MOONSHOT_SERVICE_URL_BASE,
-        }
-
-        if not values.get("client"):
-            values["client"] = openai.OpenAI(**client_params).chat.completions
-        if not values.get("async_client"):
-            values["async_client"] = openai.AsyncOpenAI(
-                **client_params
-            ).chat.completions
-
-        return values
--- a/libs/community/langchain_community/chat_models/solar.py
+++ b/libs/community/langchain_community/chat_models/solar.py
@@ -1,56 +0,0 @@
-"""Wrapper around Solar chat models."""
-
-from typing import Dict
-
-from langchain_core.pydantic_v1 import root_validator
-from langchain_core.utils import get_from_dict_or_env
-
-from langchain_community.chat_models import ChatOpenAI
-from langchain_community.llms.solar import SOLAR_SERVICE_URL_BASE, SolarCommon
-
-
-class SolarChat(SolarCommon, ChatOpenAI):
-    """Wrapper around Solar large language models.
-    To use, you should have the ``openai`` python package installed, and the
-    environment variable ``SOLAR_API_KEY`` set with your API key.
-    (Solar's chat API is compatible with OpenAI's SDK.)
-    Referenced from https://console.upstage.ai/services/solar
-    Example:
-        .. code-block:: python
-
-            from langchain_community.chat_models.solar import SolarChat
-
-            solar = SolarChat(model="solar-1-mini-chat")
-    """
-
-    @root_validator()
-    def validate_environment(cls, values: Dict) -> Dict:
-        """Validate that the environment is set up correctly."""
-        values["solar_api_key"] = get_from_dict_or_env(
-            values, "solar_api_key", "SOLAR_API_KEY"
-        )
-
-        try:
-            import openai
-
-        except ImportError:
-            raise ImportError(
-                "Could not import openai python package. "
-                "Please install it with `pip install openai`."
-            )
-
-        client_params = {
-            "api_key": values["solar_api_key"],
-            "base_url": values["base_url"]
-            if "base_url" in values
-            else SOLAR_SERVICE_URL_BASE,
-        }
-
-        if not values.get("client"):
-            values["client"] = openai.OpenAI(**client_params).chat.completions
-        if not values.get("async_client"):
-            values["async_client"] = openai.AsyncOpenAI(
-                **client_params
-            ).chat.completions
-
-        return values
--- a/libs/community/langchain_community/document_loaders/init.py
+++ b/libs/community/langchain_community/document_loaders/init.py
@@ -102,7 +102,6 @@ _module_lookup = {
    "JoplinLoader": "langchain_community.document_loaders.joplin",
    "LakeFSLoader": "langchain_community.document_loaders.lakefs",
    "LarkSuiteDocLoader": "langchain_community.document_loaders.larksuite",
-    "LLMSherpaFileLoader": "langchain_community.document_loaders.llmsherpa",
    "MHTMLLoader": "langchain_community.document_loaders.mhtml",
    "MWDumpLoader": "langchain_community.document_loaders.mediawikidump",
    "MastodonTootsLoader": "langchain_community.document_loaders.mastodon",
--- a/libs/community/langchain_community/document_loaders/bilibili.py
+++ b/libs/community/langchain_community/document_loaders/bilibili.py
@@ -8,55 +8,20 @@ from langchain_core.documents import Document

 from langchain_community.document_loaders.base import BaseLoader

-# Pre-compile regular expressions for video ID extraction
-BV_PATTERN = re.compile(r"BV\w+")
-AV_PATTERN = re.compile(r"av[0-9]+")
-

 class BiliBiliLoader(BaseLoader):
-    """
-    Loader for fetching transcripts from BiliBili videos.
-    """
+    """Load `BiliBili` video transcripts."""

-    def __init__(
-        self,
-        video_urls: List[str],
-        sessdata: str = "",
-        bili_jct: str = "",
-        buvid3: str = "",
-    ):
-        """
-        Initialize the loader with BiliBili video URLs and authentication cookies.
-        if no authentication cookies are provided, the loader can't get transcripts
-        and will only fetch videos info.
+    def __init__(self, video_urls: List[str]):
+        """Initialize with bilibili url.

        Args:
-            video_urls (List[str]): List of BiliBili video URLs.
-            sessdata (str): SESSDATA cookie value for authentication.
-            bili_jct (str): BILI_JCT cookie value for authentication.
-            buvid3 (str): BUVI3 cookie value for authentication.
+            video_urls: List of bilibili urls.
        """
        self.video_urls = video_urls
-        self.credential = None
-        try:
-            from bilibili_api import video
-        except ImportError:
-            raise ImportError(
-                "requests package not found, please install it with "
-                "`pip install bilibili-api-python`"
-            )
-        if sessdata and bili_jct and buvid3:
-            self.credential = video.Credential(
-                sessdata=sessdata, bili_jct=bili_jct, buvid3=buvid3
-            )

    def load(self) -> List[Document]:
-        """
-        Load and return a list of documents containing video transcripts.
-
-        Returns:
-            List[Document]: List of Document objects transcripts and metadata.
-        """
+        """Load Documents from bilibili url."""
        results = []
        for url in self.video_urls:
            transcript, video_info = self._get_bilibili_subs_and_info(url)
@@ -66,10 +31,6 @@ class BiliBiliLoader(BaseLoader):
        return results

    def _get_bilibili_subs_and_info(self, url: str) -> Tuple[str, dict]:
-        """
-        Retrieve video information and transcript for a given BiliBili URL.
-        """
-        bvid = BV_PATTERN.search(url)
        try:
            from bilibili_api import sync, video
        except ImportError:
@@ -77,50 +38,46 @@ class BiliBiliLoader(BaseLoader):
                "requests package not found, please install it with "
                "`pip install bilibili-api-python`"
            )
-        if bvid:
-            v = video.Video(bvid=bvid.group(), credential=self.credential)
+
+        bvid = re.search(r"BV\w+", url)
+        if bvid is not None:
+            v = video.Video(bvid=bvid.group())
        else:
-            aid = AV_PATTERN.search(url)
-            if aid:
-                v = video.Video(aid=int(aid.group()[2:]), credential=self.credential)
+            aid = re.search(r"av[0-9]+", url)
+            if aid is not None:
+                try:
+                    v = video.Video(aid=int(aid.group()[2:]))
+                except AttributeError:
+                    raise ValueError(f"{url} is not bilibili url.")
            else:
-                raise ValueError(f"Unable to find a valid video ID in URL: {url}")
+                raise ValueError(f"{url} is not bilibili url.")

        video_info = sync(v.get_info())
        video_info.update({"url": url})
-
-        # Return if no credential is provided
-        if not self.credential:
-            return "", video_info
-
-        # Fetching and processing subtitles
        sub = sync(v.get_subtitle(video_info["cid"]))
-        sub_list = sub.get("subtitles", [])
+
+        # Get subtitle url
+        sub_list = sub["subtitles"]
        if sub_list:
-            sub_url = sub_list[0].get("subtitle_url", "")
+            sub_url = sub_list[0]["subtitle_url"]
            if not sub_url.startswith("http"):
                sub_url = "https:" + sub_url
+            result = requests.get(sub_url)
+            raw_sub_titles = json.loads(result.content)["body"]
+            raw_transcript = " ".join([c["content"] for c in raw_sub_titles])

-            response = requests.get(sub_url)
-            if response.status_code == 200:
-                raw_sub_titles = json.loads(response.content).get("body", [])
-                raw_transcript = " ".join([c["content"] for c in raw_sub_titles])
-
-                raw_transcript_with_meta_info = (
-                    f"Video Title: {video_info['title']}, "
-                    f"description: {video_info['desc']}\n\n"
-                    f"Transcript: {raw_transcript}"
-                )
-                return raw_transcript_with_meta_info, video_info
-            else:
-                warnings.warn(
-                    f"Failed to fetch subtitles for {url}. "
-                    f"HTTP Status Code: {response.status_code}"
-                )
-        else:
-            warnings.warn(
-                f"No subtitles found for video: {url}. Returning empty transcript."
+            raw_transcript_with_meta_info = (
+                f"Video Title: {video_info['title']},"
+                f"description: {video_info['desc']}\n\n"
+                f"Transcript: {raw_transcript}"
            )
-
-        # Return empty transcript if no subtitles are found
-        return "", video_info
+            return raw_transcript_with_meta_info, video_info
+        else:
+            raw_transcript = ""
+            warnings.warn(
+                f"""
+                No subtitles found for video: {url}.
+                Return Empty transcript.
+                """
+            )
+            return raw_transcript, video_info
--- a/libs/community/langchain_community/document_loaders/directory.py
+++ b/libs/community/langchain_community/document_loaders/directory.py
@@ -2,18 +2,17 @@ import concurrent
 import logging
 import random
 from pathlib import Path
-from typing import Any, Callable, Iterator, List, Optional, Sequence, Type, Union
+from typing import Any, List, Optional, Sequence, Type, Union

 from langchain_core.documents import Document

 from langchain_community.document_loaders.base import BaseLoader
-from langchain_community.document_loaders.csv_loader import CSVLoader
 from langchain_community.document_loaders.html_bs import BSHTMLLoader
 from langchain_community.document_loaders.text import TextLoader
 from langchain_community.document_loaders.unstructured import UnstructuredFileLoader

 FILE_LOADER_TYPE = Union[
-    Type[UnstructuredFileLoader], Type[TextLoader], Type[BSHTMLLoader], Type[CSVLoader]
+    Type[UnstructuredFileLoader], Type[TextLoader], Type[BSHTMLLoader]
 ]
 logger = logging.getLogger(__name__)

@@ -112,18 +111,44 @@ class DirectoryLoader(BaseLoader):
        self.randomize_sample = randomize_sample
        self.sample_seed = sample_seed

+    def load_file(
+        self, item: Path, path: Path, docs: List[Document], pbar: Optional[Any]
+    ) -> None:
+        """Load a file.
+
+        Args:
+            item: File path.
+            path: Directory path.
+            docs: List of documents to append to.
+            pbar: Progress bar. Defaults to None.
+
+        """
+        if item.is_file():
+            if _is_visible(item.relative_to(path)) or self.load_hidden:
+                try:
+                    logger.debug(f"Processing file: {str(item)}")
+                    sub_docs = self.loader_cls(str(item), **self.loader_kwargs).load()
+                    docs.extend(sub_docs)
+                except Exception as e:
+                    if self.silent_errors:
+                        logger.warning(f"Error loading file {str(item)}: {e}")
+                    else:
+                        logger.error(f"Error loading file {str(item)}")
+                        raise e
+                finally:
+                    if pbar:
+                        pbar.update(1)
+
    def load(self) -> List[Document]:
        """Load documents."""
-        return list(self.lazy_load())
-
-    def lazy_load(self) -> Iterator[Document]:
-        """Load documents lazily."""
        p = Path(self.path)
        if not p.exists():
            raise FileNotFoundError(f"Directory not found: '{self.path}'")
        if not p.is_dir():
            raise ValueError(f"Expected directory, got file: '{self.path}'")

+        docs: List[Document] = []
+
        paths = p.rglob(self.glob) if self.recursive else p.glob(self.glob)
        items = [
            path
@@ -160,62 +185,15 @@ class DirectoryLoader(BaseLoader):
                    )

        if self.use_multithreading:
-            futures = []
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=self.max_concurrency
            ) as executor:
-                for i in items:
-                    futures.append(
-                        executor.submit(
-                            self._lazy_load_file_to_non_generator(self._lazy_load_file),
-                            i,
-                            p,
-                            pbar,
-                        )
-                    )
-                for future in concurrent.futures.as_completed(futures):
-                    yield future.result()
+                executor.map(lambda i: self.load_file(i, p, docs, pbar), items)
        else:
            for i in items:
-                yield from self._lazy_load_file(i, p, pbar)
+                self.load_file(i, p, docs, pbar)

        if pbar:
            pbar.close()

-    def _lazy_load_file_to_non_generator(self, func: Callable) -> Callable:
-        def non_generator(item: Path, path: Path, pbar: Optional[Any]) -> List:
-            return [x for x in func(item, path, pbar)]
-
-        return non_generator
-
-    def _lazy_load_file(
-        self, item: Path, path: Path, pbar: Optional[Any]
-    ) -> Iterator[Document]:
-        """Load a file.
-
-        Args:
-            item: File path.
-            path: Directory path.
-            pbar: Progress bar. Defaults to None.
-
-        """
-        if item.is_file():
-            if _is_visible(item.relative_to(path)) or self.load_hidden:
-                try:
-                    logger.debug(f"Processing file: {str(item)}")
-                    loader = self.loader_cls(str(item), **self.loader_kwargs)
-                    try:
-                        for subdoc in loader.lazy_load():
-                            yield subdoc
-                    except NotImplementedError:
-                        for subdoc in loader.load():
-                            yield subdoc
-                except Exception as e:
-                    if self.silent_errors:
-                        logger.warning(f"Error loading file {str(item)}: {e}")
-                    else:
-                        logger.error(f"Error loading file {str(item)}")
-                        raise e
-                finally:
-                    if pbar:
-                        pbar.update(1)
+        return docs
--- a/libs/community/langchain_community/document_loaders/llmsherpa.py
+++ b/libs/community/langchain_community/document_loaders/llmsherpa.py
@@ -1,142 +0,0 @@
-from pathlib import Path
-from typing import Iterator, Union
-from urllib.parse import urlparse
-
-from langchain_core.documents import Document
-
-from langchain_community.document_loaders.pdf import BaseLoader
-
-DEFAULT_API = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all"
-
-
-class LLMSherpaFileLoader(BaseLoader):
-    """Load Documents using `LLMSherpa`.
-
-    LLMSherpaFileLoader use LayoutPDFReader, which is part of the LLMSherpa library.
-    This tool is designed to parse PDFs while preserving their layout information,
-    which is often lost when using most PDF to text parsers.
-
-    Examples
-    --------
-    from langchain_community.document_loaders.llmsherpa import LLMSherpaFileLoader
-
-    loader = LLMSherpaFileLoader(
-        "example.pdf",
-        strategy="chunks",
-        llmsherpa_api_url="http://localhost:5010/api/parseDocument?renderFormat=all",
-    )
-    docs = loader.load()
-    """
-
-    def __init__(
-        self,
-        file_path: Union[str, Path],
-        new_indent_parser: bool = True,
-        apply_ocr: bool = True,
-        strategy: str = "chunks",
-        llmsherpa_api_url: str = DEFAULT_API,
-    ):
-        """Initialize with a file path."""
-        try:
-            import llmsherpa  # noqa:F401
-        except ImportError:
-            raise ImportError(
-                "llmsherpa package not found, please install it with "
-                "`pip install llmsherpa`"
-            )
-        _valid_strategies = ["sections", "chunks", "html", "text"]
-        if strategy not in _valid_strategies:
-            raise ValueError(
-                f"Got {strategy} for `strategy`, "
-                f"but should be one of `{_valid_strategies}`"
-            )
-        # validate llmsherpa url
-        if not self._is_valid_url(llmsherpa_api_url):
-            raise ValueError(f"Invalid URL: {llmsherpa_api_url}")
-        self.url = self._validate_llmsherpa_url(
-            url=llmsherpa_api_url,
-            new_indent_parser=new_indent_parser,
-            apply_ocr=apply_ocr,
-        )
-
-        self.strategy = strategy
-        self.file_path = str(file_path)
-
-    @staticmethod
-    def _is_valid_url(url: str) -> bool:
-        """Check if the url is valid."""
-        parsed = urlparse(url)
-        return bool(parsed.netloc) and bool(parsed.scheme)
-
-    @staticmethod
-    def _validate_llmsherpa_url(
-        url: str, new_indent_parser: bool = True, apply_ocr: bool = True
-    ) -> str:
-        """Check if the llmsherpa url is valid."""
-        parsed = urlparse(url)
-        valid_url = url
-        if ("/api/parseDocument" not in parsed.path) and (
-            "/api/document/developer/parseDocument" not in parsed.path
-        ):
-            raise ValueError(f"Invalid LLMSherpa URL: {url}")
-
-        if "renderFormat=all" not in parsed.query:
-            valid_url = valid_url + "?renderFormat=all"
-        if new_indent_parser and "useNewIndentParser=true" not in parsed.query:
-            valid_url = valid_url + "&useNewIndentParser=true"
-        if apply_ocr and "applyOcr=yes" not in parsed.query:
-            valid_url = valid_url + "&applyOcr=yes"
-
-        return valid_url
-
-    def lazy_load(
-        self,
-    ) -> Iterator[Document]:
-        """Load file."""
-        from llmsherpa.readers import LayoutPDFReader
-
-        docs_reader = LayoutPDFReader(self.url)
-        doc = docs_reader.read_pdf(self.file_path)
-
-        if self.strategy == "sections":
-            yield from [
-                Document(
-                    page_content=section.to_text(include_children=True, recurse=True),
-                    metadata={
-                        "source": self.file_path,
-                        "section_number": section_num,
-                        "section_title": section.title,
-                    },
-                )
-                for section_num, section in enumerate(doc.sections())
-            ]
-        if self.strategy == "chunks":
-            yield from [
-                Document(
-                    page_content=chunk.to_context_text(),
-                    metadata={
-                        "source": self.file_path,
-                        "chunk_number": chunk_num,
-                        "chunk_type": chunk.tag,
-                    },
-                )
-                for chunk_num, chunk in enumerate(doc.chunks())
-            ]
-        if self.strategy == "html":
-            yield from [
-                Document(
-                    page_content=doc.to_html(),
-                    metadata={
-                        "source": self.file_path,
-                    },
-                )
-            ]
-        if self.strategy == "text":
-            yield from [
-                Document(
-                    page_content=doc.to_text(),
-                    metadata={
-                        "source": self.file_path,
-                    },
-                )
-            ]
--- a/libs/community/langchain_community/document_loaders/notebook.py
+++ b/libs/community/langchain_community/document_loaders/notebook.py
@@ -25,11 +25,7 @@ def concatenate_cells(
    """
    cell_type = cell["cell_type"]
    source = cell["source"]
-    if include_outputs:
-        try:
-            output = cell["outputs"]
-        except KeyError:
-            pass
+    output = cell["outputs"]

    if include_outputs and cell_type == "code" and output:
        if "ename" in output[0].keys():
@@ -62,13 +58,14 @@ def concatenate_cells(

 def remove_newlines(x: Any) -> Any:
    """Recursively remove newlines, no matter the data structure they are stored in."""
+    import pandas as pd

    if isinstance(x, str):
        return x.replace("\n", "")
    elif isinstance(x, list):
        return [remove_newlines(elem) for elem in x]
-    elif isinstance(x, dict):
-        return {k: remove_newlines(v) for (k, v) in x.items()}
+    elif isinstance(x, pd.DataFrame):
+        return x.applymap(remove_newlines)
    else:
        return x

@@ -107,29 +104,29 @@ class NotebookLoader(BaseLoader):
        self,
    ) -> List[Document]:
        """Load documents."""
+        try:
+            import pandas as pd
+        except ImportError:
+            raise ImportError(
+                "pandas is needed for Notebook Loader, "
+                "please install with `pip install pandas`"
+            )
        p = Path(self.file_path)

        with open(p, encoding="utf8") as f:
            d = json.load(f)

-        filtered_data = [
-            {k: v for (k, v) in cell.items() if k in ["cell_type", "source", "outputs"]}
-            for cell in d["cells"]
-        ]
-
+        data = pd.json_normalize(d["cells"])
+        filtered_data = data[["cell_type", "source", "outputs"]]
        if self.remove_newline:
-            filtered_data = list(map(remove_newlines, filtered_data))
+            filtered_data = filtered_data.applymap(remove_newlines)

-        text = "".join(
-            list(
-                map(
-                    lambda x: concatenate_cells(
-                        x, self.include_outputs, self.max_output_length, self.traceback
-                    ),
-                    filtered_data,
-                )
-            )
-        )
+        text = filtered_data.apply(
+            lambda x: concatenate_cells(
+                x, self.include_outputs, self.max_output_length, self.traceback
+            ),
+            axis=1,
+        ).str.cat(sep=" ")

        metadata = {"source": str(p)}

--- a/libs/community/langchain_community/document_loaders/parsers/audio.py
+++ b/libs/community/langchain_community/document_loaders/parsers/audio.py
@@ -1,5 +1,4 @@
 import logging
-import os
 import time
 from typing import Dict, Iterator, Optional, Tuple

@@ -26,17 +25,10 @@ class OpenAIWhisperParser(BaseBlobParser):
    """

    def __init__(
-        self,
-        api_key: Optional[str] = None,
-        *,
-        chunk_duration_threshold: float = 0.1,
-        base_url: Optional[str] = None,
+        self, api_key: Optional[str] = None, *, chunk_duration_threshold: float = 0.1
    ):
        self.api_key = api_key
        self.chunk_duration_threshold = chunk_duration_threshold
-        self.base_url = (
-            base_url if base_url is not None else os.environ.get("OPENAI_API_BASE")
-        )

    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
        """Lazily parse the blob."""
@@ -59,13 +51,11 @@ class OpenAIWhisperParser(BaseBlobParser):

        if is_openai_v1():
            # api_key optional, defaults to `os.environ['OPENAI_API_KEY']`
-            client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
+            client = openai.OpenAI(api_key=self.api_key)
        else:
            # Set the API key if provided
            if self.api_key:
                openai.api_key = self.api_key
-            if self.base_url:
-                openai.base_url = self.base_url

        # Audio file from disk
        audio = AudioSegment.from_file(blob.path)
--- a/libs/community/langchain_community/document_loaders/pdf.py
+++ b/libs/community/langchain_community/document_loaders/pdf.py
@@ -670,7 +670,7 @@ class AmazonTextractPDFLoader(BasePDFLoader):
                raise ValueError(
                    "Could not load credentials to authenticate with AWS client. "
                    "Please check that credentials in the specified "
-                    f"profile name are valid. {e}"
+                    "profile name are valid."
                ) from e
        self.parser = AmazonTextractPDFParser(
            textract_features=features,
--- a/libs/community/langchain_community/embeddings/init.py
+++ b/libs/community/langchain_community/embeddings/init.py
@@ -18,7 +18,6 @@ from typing import Any
 _module_lookup = {
    "AlephAlphaAsymmetricSemanticEmbedding": "langchain_community.embeddings.aleph_alpha",  # noqa: E501
    "AlephAlphaSymmetricSemanticEmbedding": "langchain_community.embeddings.aleph_alpha",  # noqa: E501
-    "AnyscaleEmbeddings": "langchain_community.embeddings.anyscale",
    "AwaEmbeddings": "langchain_community.embeddings.awa",
    "AzureOpenAIEmbeddings": "langchain_community.embeddings.azure_openai",
    "BaichuanTextEmbeddings": "langchain_community.embeddings.baichuan",
@@ -67,8 +66,6 @@ _module_lookup = {
    "OctoAIEmbeddings": "langchain_community.embeddings.octoai_embeddings",
    "OllamaEmbeddings": "langchain_community.embeddings.ollama",
    "OpenAIEmbeddings": "langchain_community.embeddings.openai",
-    "OpenVINOEmbeddings": "langchain_community.embeddings.openvino",
-    "OpenVINOBgeEmbeddings": "langchain_community.embeddings.openvino",
    "QianfanEmbeddingsEndpoint": "langchain_community.embeddings.baidu_qianfan_endpoint",  # noqa: E501
    "QuantizedBgeEmbeddings": "langchain_community.embeddings.itrex",
    "QuantizedBiEncoderEmbeddings": "langchain_community.embeddings.optimum_intel",
@@ -77,7 +74,6 @@ _module_lookup = {
    "SelfHostedHuggingFaceEmbeddings": "langchain_community.embeddings.self_hosted_hugging_face",  # noqa: E501
    "SelfHostedHuggingFaceInstructEmbeddings": "langchain_community.embeddings.self_hosted_hugging_face",  # noqa: E501
    "SentenceTransformerEmbeddings": "langchain_community.embeddings.sentence_transformer",  # noqa: E501
-    "SolarEmbeddings": "langchain_community.embeddings.solar",
    "SpacyEmbeddings": "langchain_community.embeddings.spacy_embeddings",
    "SparkLLMTextEmbeddings": "langchain_community.embeddings.sparkllm",
    "TensorflowHubEmbeddings": "langchain_community.embeddings.tensorflow_hub",
--- a/libs/community/langchain_community/embeddings/anyscale.py
+++ b/libs/community/langchain_community/embeddings/anyscale.py
@@ -1,75 +0,0 @@
-"""Anyscale embeddings wrapper."""
-from __future__ import annotations
-
-from typing import Dict
-
-from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
-from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
-
-from langchain_community.embeddings.openai import OpenAIEmbeddings
-from langchain_community.utils.openai import is_openai_v1
-
-DEFAULT_API_BASE = "https://api.endpoints.anyscale.com/v1"
-DEFAULT_MODEL = "thenlper/gte-large"
-
-
-class AnyscaleEmbeddings(OpenAIEmbeddings):
-    """`Anyscale` Embeddings API."""
-
-    anyscale_api_key: SecretStr = Field(default=None)
-    """AnyScale Endpoints API keys."""
-    model: str = Field(default=DEFAULT_MODEL)
-    """Model name to use."""
-    anyscale_api_base: str = Field(default=DEFAULT_API_BASE)
-    """Base URL path for API requests."""
-    tiktoken_enabled: bool = False
-    """Set this to False for non-OpenAI implementations of the embeddings API"""
-    embedding_ctx_length: int = 500
-    """The maximum number of tokens to embed at once."""
-
-    @property
-    def lc_secrets(self) -> Dict[str, str]:
-        return {
-            "anyscale_api_key": "ANYSCALE_API_KEY",
-        }
-
-    @root_validator()
-    def validate_environment(cls, values: dict) -> dict:
-        """Validate that api key and python package exists in environment."""
-        values["anyscale_api_key"] = convert_to_secret_str(
-            get_from_dict_or_env(
-                values,
-                "anyscale_api_key",
-                "ANYSCALE_API_KEY",
-            )
-        )
-        values["anyscale_api_base"] = get_from_dict_or_env(
-            values,
-            "anyscale_api_base",
-            "ANYSCALE_API_BASE",
-            default=DEFAULT_API_BASE,
-        )
-        try:
-            import openai
-
-        except ImportError:
-            raise ImportError(
-                "Could not import openai python package. "
-                "Please install it with `pip install openai`."
-            )
-        if is_openai_v1():
-            # For backwards compatibility.
-            client_params = {
-                "api_key": values["anyscale_api_key"].get_secret_value(),
-                "base_url": values["anyscale_api_base"],
-            }
-            values["client"] = openai.OpenAI(**client_params).embeddings
-        else:
-            values["openai_api_base"] = values["anyscale_api_base"]
-            values["openai_api_key"] = values["anyscale_api_key"].get_secret_value()
-            values["client"] = openai.Embedding
-        return values
-
-    @property
-    def _llm_type(self) -> str:
-        return "anyscale-embedding"
--- a/Show More
+++ b/Show More