docs: algolia api key update

docs: v0.2 search index (#21619 )
docs: update structured how to (#21679 )
2026-02-04 08:10:25 +00:00 · 2024-05-14 16:22:49 -07:00 · 2024-05-14 15:37:42 -07:00 · 2024-05-14 22:19:51 +00:00 · 2024-05-14 22:11:26 +00:00 · 2024-05-14 17:29:07 -04:00
690 changed files with 30334 additions and 8787 deletions
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -6,8 +6,8 @@ from typing import Dict
 LANGCHAIN_DIRS = [
    "libs/core",
    "libs/text-splitters",
-    "libs/community",
    "libs/langchain",
+    "libs/community",
    "libs/experimental",
 ]

--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -3,9 +3,9 @@ name: CI / cd . / make spell_check

 on:
  push:
-    branches: [master]
+    branches: [master, v0.1]
  pull_request:
-    branches: [master]
+    branches: [master, v0.1]

 permissions:
  contents: read
--- a/.github/workflows/scheduled_test.yml
+++ b/.github/workflows/scheduled_test.yml
@@ -12,6 +12,7 @@ jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
+      fail-fast: false
      matrix:
        python-version:
          - "3.8"
--- a/2
+++ b/2
@@ -17,7 +17,7 @@ clean: docs_clean api_docs_clean

 ## docs_build: Build the documentation.
 docs_build:
-	cd docs && make build-local
+	cd docs && make build

 ## docs_clean: Clean the documentation build artifacts.
 docs_clean:
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@

 [![Release Notes](https://img.shields.io/github/release/langchain-ai/langchain)](https://github.com/langchain-ai/langchain/releases)
 [![CI](https://github.com/langchain-ai/langchain/actions/workflows/check_diffs.yml/badge.svg)](https://github.com/langchain-ai/langchain/actions/workflows/check_diffs.yml)
-[![Downloads](https://static.pepy.tech/badge/langchain/month)](https://pepy.tech/project/langchain)
+[![Downloads](https://static.pepy.tech/badge/langchain-core/month)](https://pepy.tech/project/langchain-core)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI)](https://twitter.com/langchainai)
 [![](https://dcbadge.vercel.app/api/server/6adMQxSpJS?compact=true&style=flat)](https://discord.gg/6adMQxSpJS)
--- a/cookbook/sql_db_qa.mdx
+++ b/cookbook/sql_db_qa.mdx
@@ -647,7 +647,7 @@ Sometimes you may not have the luxury of using OpenAI or other service-hosted la
 import logging
 import torch
 from transformers import AutoTokenizer, GPT2TokenizerFast, pipeline, AutoModelForSeq2SeqLM, AutoModelForCausalLM
-from langchain_community.llms import HuggingFacePipeline
+from langchain_huggingface import HuggingFacePipeline

 # Note: This model requires a large GPU, e.g. an 80GB A100. See documentation for other ways to run private non-OpenAI models.
 model_id = "google/flan-ul2"
@@ -992,7 +992,7 @@ Now that you have some examples (with manually corrected output SQL), you can do
 ```python
 from langchain.prompts import FewShotPromptTemplate, PromptTemplate
 from langchain.chains.sql_database.prompt import _sqlite_prompt, PROMPT_SUFFIX
-from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain.prompts.example_selector.semantic_similarity import SemanticSimilarityExampleSelector
 from langchain_community.vectorstores import Chroma

--- a/docs/Makefile
+++ b/docs/Makefile
@@ -13,7 +13,7 @@ OUTPUT_NEW_DOCS_DIR = $(OUTPUT_NEW_DIR)/docs

 PYTHON = .venv/bin/python

-PARTNER_DEPS_LIST := $(shell ls -1 ../libs/partners | grep -vE "airbyte|ibm" | xargs -I {} echo "../libs/partners/{}" | tr '\n' ' ')
+PARTNER_DEPS_LIST := $(shell find ../libs/partners -mindepth 1 -maxdepth 1 -type d -exec test -e "{}/pyproject.toml" \; -print | grep -vE "airbyte|ibm|ai21" | tr '\n' ' ')

 PORT ?= 3001

@@ -48,8 +48,6 @@ generate-files:
 	wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O $(INTERMEDIATE_DIR)/langgraph.md
 	$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langgraph.md https://github.com/langchain-ai/langgraph/tree/main/

-	$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(INTERMEDIATE_DIR)
-
 copy-infra:
 	mkdir -p $(OUTPUT_NEW_DIR)
 	cp -r src $(OUTPUT_NEW_DIR)
@@ -68,7 +66,10 @@ render:
 md-sync:
 	rsync -avm --include="*/" --include="*.mdx" --include="*.md" --include="*.png" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_NEW_DOCS_DIR)

-build: install-py-deps generate-files copy-infra render md-sync
+generate-references:
+	$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(OUTPUT_NEW_DOCS_DIR)
+
+build: install-py-deps generate-files copy-infra render md-sync generate-references

 vercel-build: install-vercel-deps build
 	rm -rf docs
@@ -78,6 +79,7 @@ vercel-build: install-vercel-deps build
 	mv build v0.2
 	mkdir build
 	mv v0.2 build
+	mv build/v0.2/404.html build

 start:
 	cd $(OUTPUT_NEW_DIR) && yarn && yarn start --port=$(PORT)
--- a/docs/api_reference/_static/css/custom.css
+++ b/docs/api_reference/_static/css/custom.css
@@ -12,7 +12,8 @@ pre {
  }
 }

-#my-component-root *, #headlessui-portal-root * {
+#my-component-root *,
+#headlessui-portal-root * {
  z-index: 10000;
 }

--- a/docs/api_reference/create_api_rst.py
+++ b/docs/api_reference/create_api_rst.py
@@ -359,9 +359,14 @@ def main(dirs: Optional[list] = None) -> None:
        dirs = [
            dir_
            for dir_ in os.listdir(ROOT_DIR / "libs")
-            if dir_ not in ("cli", "partners")
+            if dir_ not in ("cli", "partners", "standard-tests")
+        ]
+        dirs += [
+            dir_
+            for dir_ in os.listdir(ROOT_DIR / "libs" / "partners")
+            if os.path.isdir(dir_)
+            and "pyproject.toml" in os.listdir(ROOT_DIR / "libs" / "partners" / dir_)
        ]
-        dirs += os.listdir(ROOT_DIR / "libs" / "partners")
    for dir_ in dirs:
        # Skip any hidden directories
        # Some of these could be present by mistake in the code base
--- a/docs/api_reference/themes/scikit-learn-modern/static/css/theme.css
+++ b/docs/api_reference/themes/scikit-learn-modern/static/css/theme.css
@@ -1398,3 +1398,20 @@ table.sk-sponsor-table td {
 .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
 .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
 .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
+
+/** Custom styles overriding certain values */
+
+div.sk-sidebar-toc-wrapper {
+  width: unset;
+  overflow-x: auto;
+}
+
+div.sk-sidebar-toc-wrapper > [aria-label="rellinks"] {
+  position: sticky;
+  left: 0;
+}
+
+.navbar-nav .dropdown-menu {
+  max-height: 80vh;
+  overflow-y: auto;
+}
--- a/docs/docs/additional_resources/tutorials.mdx
+++ b/docs/docs/additional_resources/tutorials.mdx
@@ -48,7 +48,7 @@
 - [by Rabbitmetrics](https://youtu.be/aywZrzNaKjs)
 - [by Ivan Reznikov](https://medium.com/@ivanreznikov/langchain-101-course-updated-668f7b41d6cb)

-## [Documentation: Use cases](/docs/use_cases)
+## [Documentation: Use cases](/docs/how_to#use-cases)

 ---------------------

--- a/docs/docs/changes/changelog/core.mdx
+++ b/docs/docs/changes/changelog/core.mdx
@@ -1,27 +1,10 @@
 # langchain-core

-## 0.1.7 (Jan 5, 2024)
-
-#### Deleted
-
-No deletions.
+## 0.1.x

 #### Deprecated

 - `BaseChatModel` methods `__call__`, `call_as_llm`, `predict`, `predict_messages`. Will be removed in 0.2.0. Use `BaseChatModel.invoke` instead.
 - `BaseChatModel` methods `apredict`, `apredict_messages`. Will be removed in 0.2.0. Use `BaseChatModel.ainvoke` instead.
 - `BaseLLM` methods `__call__, `predict`, `predict_messages`. Will be removed in 0.2.0. Use `BaseLLM.invoke` instead.
- `BaseLLM` methods `apredict`, `apredict_messages`. Will be removed in 0.2.0. Use `BaseLLM.ainvoke` instead.
-
-#### Fixed
-
- Restrict recursive URL scraping: [#15559](https://github.com/langchain-ai/langchain/pull/15559)
-
-#### Added
-
-No additions.
-
-#### Beta
-
- Marked `langchain_core.load.load` and `langchain_core.load.loads` as beta.
- Marked `langchain_core.beta.runnables.context.ContextGet` and `langchain_core.beta.runnables.context.ContextSet` as beta.
+- `BaseLLM` methods `apredict`, `apredict_messages`. Will be removed in 0.2.0. Use `BaseLLM.ainvoke` instead.
--- a/docs/docs/changes/changelog/langchain.mdx
+++ b/docs/docs/changes/changelog/langchain.mdx
@@ -1,16 +1,73 @@
 # langchain

+## 0.2.0
+
+### Deleted
+
+As of release 0.2.0, `langchain` is required to be integration-agnostic. This means that code in `langchain`  should not by default instantiate any specific chat models, llms, embedding models, vectorstores etc; instead, the user will be required to specify those explicitly.
+
+The following functions and classes require an explicit LLM to be passed as an argument:
+
+- `langchain.agents.agent_toolkits.vectorstore.toolkit.VectorStoreToolkit`
+- `langchain.agents.agent_toolkits.vectorstore.toolkit.VectorStoreRouterToolkit`
+- `langchain.chains.openai_functions.get_openapi_chain`
+- `langchain.chains.router.MultiRetrievalQAChain.from_retrievers`
+- `langchain.indexes.VectorStoreIndexWrapper.query`
+- `langchain.indexes.VectorStoreIndexWrapper.query_with_sources`
+- `langchain.indexes.VectorStoreIndexWrapper.aquery_with_sources`
+- `langchain.chains.flare.FlareChain`
+
+The following classes now require passing an explicit Embedding model as an argument:
+
+- `langchain.indexes.VectostoreIndexCreator`
+
+The following code has been removed:
+
+- `langchain.natbot.NatBotChain.from_default` removed in favor of the `from_llm` class method.
+
+### Deprecated
+
+We have two main types of deprecations:
+
+1. Code that was moved from `langchain` into another package (e.g, `langchain-community`)
+
+If you try to import it from `langchain`, the import will keep on working, but will raise a deprecation warning. The warning will provide a replacement import statement.
+
+```python
+python -c "from langchain.document_loaders.markdown import UnstructuredMarkdownLoader"
+
+```
+
+```python
+LangChainDeprecationWarning: Importing UnstructuredMarkdownLoader from langchain.document_loaders is deprecated. Please replace deprecated imports:
+
+>> from langchain.document_loaders import UnstructuredMarkdownLoader
+
+with new imports of:
+
+>> from langchain_community.document_loaders import UnstructuredMarkdownLoader
+```
+
+We will continue supporting the imports in `langchain` until release 0.4 as long as the relevant package where the code lives is installed. (e.g., as long as `langchain_community` is installed.)
+
+However, we advise for users to not rely on these imports and instead migrate to the new imports. To help with this process, we’re releasing a migration script via the LangChain CLI. See further instructions in migration guide.
+
+1. Code that has better alternatives available and will eventually be removed, so there’s only a single way to do things. (e.g., `predict_messages` method in ChatModels has been deprecated in favor of `invoke`).
+
+Many of these were marked for removal in 0.2. We have bumped the removal to 0.3.
+
+
 ## 0.1.0 (Jan 5, 2024)

-#### Deleted
+### Deleted

 No deletions.

-#### Deprecated
+### Deprecated

 Deprecated classes and methods will be removed in 0.2.0

-| Deprecated                | Alternative                       | Reason                                         |
+| Deprecated                      | Alternative                       | Reason                                         |
 |---------------------------------|-----------------------------------|------------------------------------------------|
 | ChatVectorDBChain               | ConversationalRetrievalChain      | More general to all retrievers                 |
 | create_ernie_fn_chain           | create_ernie_fn_runnable          | Use LCEL under the hood                        |
--- a/docs/docs/concepts.mdx
+++ b/docs/docs/concepts.mdx
@@ -7,16 +7,7 @@ This section contains introductions to key parts of LangChain.

 ## Architecture

-LangChain as a framework consists of several pieces. The below diagram shows how they relate.
-
-<ThemedImage
-  alt="Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers."
-  sources={{
-    light: useBaseUrl('/svg/langchain_stack.svg'),
-    dark: useBaseUrl('/svg/langchain_stack_dark.svg'),
-  }}
-  title="LangChain Framework Overview"
-/>
+LangChain as a framework consists of a number of packages.

 ### `langchain-core`
 This package contains base abstractions of different components and ways to compose them together.
@@ -24,13 +15,6 @@ The interfaces for core components like LLMs, vectorstores, retrievers and more
 No third party integrations are defined here.
 The dependencies are kept purposefully very lightweight.

-### `langchain-community`
-
-This package contains third party integrations that are maintained by the LangChain community.
-Key partner packages are separated out (see below).
-This contains all integrations for various components (LLMs, vectorstores, retrievers).
-All dependencies in this package are optional to keep the package as lightweight as possible.
-
 ### Partner packages

 While the long tail of integrations are in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
@@ -42,14 +26,21 @@ The main `langchain` package contains chains, agents, and retrieval strategies t
 These are NOT third party integrations.
 All chains, agents, and retrieval strategies here are NOT specific to any one integration, but rather generic across all integrations.

-### [LangGraph](/docs/langgraph)
+### `langchain-community`

-Not currently in this repo, `langgraph` is an extension of `langchain` aimed at
+This package contains third party integrations that are maintained by the LangChain community.
+Key partner packages are separated out (see below).
+This contains all integrations for various components (LLMs, vectorstores, retrievers).
+All dependencies in this package are optional to keep the package as lightweight as possible.
+
+### [`langgraph`](/docs/langgraph)
+
+`langgraph` is an extension of `langchain` aimed at
 building robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph.

 LangGraph exposes high level interfaces for creating common types of agents, as well as a low-level API for constructing more contr

-### [langserve](/docs/langserve)
+### [`langserve`](/docs/langserve)

 A package to deploy LangChain chains as REST APIs. Makes it easy to get a production ready API up and running.

@@ -57,28 +48,18 @@ A package to deploy LangChain chains as REST APIs. Makes it easy to get a produc

 A developer platform that lets you debug, test, evaluate, and monitor LLM applications.

-## Installation
+<ThemedImage
+  alt="Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers."
+  sources={{
+    light: useBaseUrl('/svg/langchain_stack.svg'),
+    dark: useBaseUrl('/svg/langchain_stack_dark.svg'),
+  }}
+  title="LangChain Framework Overview"
+/>

-If you want to work with high level abstractions, you should install the `langchain` package.
+## LangChain Expression Language (LCEL)

-```shell
-pip install langchain
-```
-
-If you want to work with specific integrations, you will need to install them separately.
-See [here](/docs/integrations/platforms/) for a list of integrations and how to install them.
-
-For working with LangSmith, you will need to set up a LangSmith developer account [here](https://smith.langchain.com) and get an API key.
-After that, you can enable it by setting environment variables:
-
-```shell
-export LANGCHAIN_TRACING_V2=true
-export LANGCHAIN_API_KEY=ls__...
-```
-
-## LangChain Expression Language
-
-LangChain Expression Language, or LCEL, is a declarative way to easily compose chains together.
+LangChain Expression Language, or LCEL, is a declarative way to chain LangChain components.
 LCEL was designed from day 1 to **support putting prototypes in production, with no code changes**, from the simplest “prompt + LLM” chain to the most complex chains (we’ve seen folks successfully run LCEL chains with 100s of steps in production). To highlight a few of the reasons you might want to use LCEL:

 **First-class streaming support**
@@ -106,7 +87,7 @@ With LCEL, **all** steps are automatically logged to [LangSmith](/docs/langsmith
 [**Seamless LangServe deployment**](/docs/langserve)
 Any chain created with LCEL can be easily deployed using [LangServe](/docs/langserve).

-### Interface
+### Runnable interface

 To make it as easy as possible to create custom chains, we've implemented a ["Runnable"](https://api.python.langchain.com/en/stable/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) protocol. Many LangChain components implement the `Runnable` protocol, including chat models, LLMs, output parsers, retrievers, prompt templates, and more. There are also several useful primitives for working with runnables, which you can read about below.

@@ -146,16 +127,6 @@ All runnables expose input and output **schemas** to inspect the inputs and outp
 LangChain provides standard, extendable interfaces and external integrations for various components useful for building with LLMs.
 Some components LangChain implements, some components we rely on third-party integrations for, and others are a mix.

-### LLMs
-Language models that takes a string as input and returns a string.
-These are traditionally older models (newer models generally are `ChatModels`, see below).
-
-Although the underlying models are string in, string out, the LangChain wrappers also allow these models to take messages as input.
-This makes them interchangeable with ChatModels.
-When messages are passed in as input, they will be formatted into a string under the hood before being passed to the underlying model.
-
-LangChain does not provide any LLMs, rather we rely on third party integrations.
-
 ### Chat models
 Language models that use a sequence of messages as inputs and return chat messages as outputs (as opposed to using plain text).
 These are traditionally newer models (older models are generally `LLMs`, see above).
@@ -172,45 +143,17 @@ We have some standardized parameters when constructing ChatModels:

 ChatModels also accept other parameters that are specific to that integration.

-### Function/Tool Calling
+### LLMs
+Language models that takes a string as input and returns a string.
+These are traditionally older models (newer models generally are `ChatModels`, see below).

-:::info
-We use the term tool calling interchangeably with function calling. Although
-function calling is sometimes meant to refer to invocations of a single function,
-we treat all models as though they can return multiple tool or function calls in
-each message.
-:::
+Although the underlying models are string in, string out, the LangChain wrappers also allow these models to take messages as input.
+This makes them interchangeable with ChatModels.
+When messages are passed in as input, they will be formatted into a string under the hood before being passed to the underlying model.

-Tool calling allows a model to respond to a given prompt by generating output that
-matches a user-defined schema. While the name implies that the model is performing
-some action, this is actually not the case! The model is coming up with the
-arguments to a tool, and actually running the tool (or not) is up to the user -
-for example, if you want to [extract output matching some schema](/docs/tutorial/extraction/)
-from unstructured text, you could give the model an "extraction" tool that takes
-parameters matching the desired schema, then treat the generated output as your final
-result.
+LangChain does not provide any LLMs, rather we rely on third party integrations.

-A tool call includes a name, arguments dict, and an optional identifier. The
-arguments dict is structured `{argument_name: argument_value}`.
-
-Many LLM providers, including [Anthropic](https://www.anthropic.com/),
-[Cohere](https://cohere.com/), [Google](https://cloud.google.com/vertex-ai),
-[Mistral](https://mistral.ai/), [OpenAI](https://openai.com/), and others,
-support variants of a tool calling feature. These features typically allow requests
-to the LLM to include available tools and their schemas, and for responses to include
-calls to these tools. For instance, given a search engine tool, an LLM might handle a
-query by first issuing a call to the search engine. The system calling the LLM can
-receive the tool call, execute it, and return the output to the LLM to inform its
-response. LangChain includes a suite of [built-in tools](/docs/integrations/tools/)
-and supports several methods for defining your own [custom tools](/docs/how_to/custom_tools).
-
-There are two main use cases for function/tool calling:
-
- [How to return structured data from an LLM](/docs/how_to/structured_output/)
- [How to use a model to call tools](/docs/how_to/tool_calling/)
-
-
-### Message types
+### Messages

 Some language models take a list of messages as input and return a message.
 There are a few different types of messages.
@@ -338,7 +281,7 @@ prompt_template = ChatPromptTemplate.from_messages([
 ])
 ```

-### Example Selectors
+### Example selectors
 One common prompting technique for achieving better performance is to include examples as part of the prompt.
 This gives the language model concrete examples of how it should behave.
 Sometimes these examples are hardcoded into the prompt, but for more advanced situations it may be nice to dynamically select them.
@@ -389,7 +332,7 @@ LangChain has lots of different types of output parsers. This is a list of outpu
 | [Datetime](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.datetime.DatetimeOutputParser.html#langchain.output_parsers.datetime.DatetimeOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `datetime.datetime`  | Parses response into a datetime string.                                                                                                                                                                                                                  |
 | [Structured](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.structured.StructuredOutputParser.html#langchain.output_parsers.structured.StructuredOutputParser)      |                    | ✅                             |           | `str` \| `Message`                 | `Dict[str, str]`     | An output parser that returns structured information. It is less powerful than other output parsers since it only allows for fields to be strings. This can be useful when you are working with smaller LLMs.                                            |

-### Chat History
+### Chat history
 Most LLM applications have a conversational interface.
 An essential component of a conversation is being able to refer to information introduced earlier in the conversation.
 At bare minimum, a conversational system should be able to access some window of past messages directly.
@@ -398,7 +341,7 @@ The concept of `ChatHistory` refers to a class in LangChain which can be used to
 This `ChatHistory` will keep track of inputs and outputs of the underlying chain, and append them as messages to a message database
 Future interactions will then load those messages and pass them into the chain as part of the input.

-### Document
+### Documents

 A Document object in LangChain contains information about some data. It has two attributes:

@@ -445,12 +388,12 @@ Embeddings create a vector representation of a piece of text. This is useful bec

 The base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query. The former takes as input multiple texts, while the latter takes a single text. The reason for having these as two separate methods is that some embedding providers have different embedding methods for documents (to be searched over) vs queries (the search query itself).

-### Vectorstores
+### Vector stores
 One of the most common ways to store and search over unstructured data is to embed it and store the resulting embedding vectors,
 and then at query time to embed the unstructured query and retrieve the embedding vectors that are 'most similar' to the embedded query.
 A vector store takes care of storing embedded data and performing vector search for you.

-Vectorstores can be converted to the retriever interface by doing:
+Vector stores can be converted to the retriever interface by doing:

 ```python
 vectorstore = MyVectorStore()
@@ -465,31 +408,6 @@ Retrievers can be created from vectorstores, but are also broad enough to includ

 Retrievers accept a string query as input and return a list of Document's as output.

-### Advanced Retrieval Types
-
-LangChain provides several advanced retrieval types. A full list is below, along with the following information:
-
-**Name**: Name of the retrieval algorithm.
-
-**Index Type**: Which index type (if any) this relies on.
-
-**Uses an LLM**: Whether this retrieval method uses an LLM.
-
-**When to Use**: Our commentary on when you should considering using this retrieval method.
-
-**Description**: Description of what this retrieval algorithm is doing.
-
-| Name                      | Index Type                   | Uses an LLM               | When to Use                                                                                                                                   | Description                                                                                                                                                                                                                                                                                      |
-|---------------------------|------------------------------|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| [Vectorstore](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStoreRetriever.html#langchain_core.vectorstores.VectorStoreRetriever)               | Vectorstore                  | No                        | If you are just getting started and looking for something quick and easy.                                                                     | This is the simplest method and the one that is easiest to get started with. It involves creating embeddings for each piece of text.                                                                                                                                                             |
-| [ParentDocument](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html#langchain.retrievers.parent_document_retriever.ParentDocumentRetriever)            | Vectorstore + Document Store | No                        | If your pages have lots of smaller pieces of distinct information that are best indexed by themselves, but best retrieved all together.       | This involves indexing multiple chunks for each document. Then you find the chunks that are most similar in embedding space, but you retrieve the whole parent document and return that (rather than individual chunks).                                                                         |
-| [Multi Vector](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.multi_vector.MultiVectorRetriever.html#langchain.retrievers.multi_vector.MultiVectorRetriever)              | Vectorstore + Document Store | Sometimes during indexing | If you are able to extract information from documents that you think is more relevant to index than the text itself.                          | This involves creating multiple vectors for each document. Each vector could be created in a myriad of ways - examples include summaries of the text and hypothetical questions.                                                                                                                 |
-| [Self Query](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.self_query.base.SelfQueryRetriever.html#langchain.retrievers.self_query.base.SelfQueryRetriever)               | Vectorstore                  | Yes                       | If users are asking questions that are better answered by fetching documents based on metadata rather than similarity with the text.          | This uses an LLM to transform user input into two things: (1) a string to look up semantically, (2) a metadata filer to go along with it. This is useful because oftentimes questions are about the METADATA of documents (not the content itself).                                              |
-| [Contextual Compression](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.contextual_compression.ContextualCompressionRetriever.html#langchain.retrievers.contextual_compression.ContextualCompressionRetriever)    | Any                          | Sometimes                 | If you are finding that your retrieved documents contain too much irrelevant information and are distracting the LLM.                         | This puts a post-processing step on top of another retriever and extracts only the most relevant information from retrieved documents. This can be done with embeddings or an LLM.                                                                                                               |
-| [Time-Weighted Vectorstore](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.time_weighted_retriever.TimeWeightedVectorStoreRetriever.html#langchain.retrievers.time_weighted_retriever.TimeWeightedVectorStoreRetriever) | Vectorstore                  | No                        | If you have timestamps associated with your documents, and you want to retrieve the most recent ones                                          | This fetches documents based on a combination of semantic similarity (as in normal vector retrieval) and recency (looking at timestamps of indexed documents)                                                                                                                                    |
-| [Multi-Query Retriever](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html#langchain.retrievers.multi_query.MultiQueryRetriever)     | Any                          | Yes                       | If users are asking questions that are complex and require multiple pieces of distinct information to respond                                 | This uses an LLM to generate multiple queries from the original one. This is useful when the original query needs pieces of information about multiple topics to be properly answered. By generating multiple queries, we can then fetch documents for each of them.                             |
-| [Ensemble](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.ensemble.EnsembleRetriever.html#langchain.retrievers.ensemble.EnsembleRetriever)                  | Any                          | No                        | If you have multiple retrieval methods and want to try combining them.                                                                        | This fetches documents from multiple retrievers and then combines them.                                                                                                                                                                                                                          |
-
 ### Tools
 Tools are interfaces that an agent, chain, or LLM can use to interact with the world.
 They combine a few things:
@@ -541,3 +459,94 @@ In order to solve that we built LangGraph to be this flexible, highly-controllab
 If you are still using AgentExecutor, do not fear: we still have a guide on [how to use AgentExecutor](/docs/how_to/agent_executor).
 It is recommended, however, that you start to transition to LangGraph.
 In order to assist in this we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent)
+
+## Techniques
+
+### Function/tool calling
+
+:::info
+We use the term tool calling interchangeably with function calling. Although
+function calling is sometimes meant to refer to invocations of a single function,
+we treat all models as though they can return multiple tool or function calls in
+each message.
+:::
+
+Tool calling allows a model to respond to a given prompt by generating output that
+matches a user-defined schema. While the name implies that the model is performing
+some action, this is actually not the case! The model is coming up with the
+arguments to a tool, and actually running the tool (or not) is up to the user -
+for example, if you want to [extract output matching some schema](/docs/tutorials/extraction)
+from unstructured text, you could give the model an "extraction" tool that takes
+parameters matching the desired schema, then treat the generated output as your final
+result.
+
+A tool call includes a name, arguments dict, and an optional identifier. The
+arguments dict is structured `{argument_name: argument_value}`.
+
+Many LLM providers, including [Anthropic](https://www.anthropic.com/),
+[Cohere](https://cohere.com/), [Google](https://cloud.google.com/vertex-ai),
+[Mistral](https://mistral.ai/), [OpenAI](https://openai.com/), and others,
+support variants of a tool calling feature. These features typically allow requests
+to the LLM to include available tools and their schemas, and for responses to include
+calls to these tools. For instance, given a search engine tool, an LLM might handle a
+query by first issuing a call to the search engine. The system calling the LLM can
+receive the tool call, execute it, and return the output to the LLM to inform its
+response. LangChain includes a suite of [built-in tools](/docs/integrations/tools/)
+and supports several methods for defining your own [custom tools](/docs/how_to/custom_tools).
+
+There are two main use cases for function/tool calling:
+
+- [How to return structured data from an LLM](/docs/how_to/structured_output/)
+- [How to use a model to call tools](/docs/how_to/tool_calling/)
+
+
+### Retrieval
+
+LangChain provides several advanced retrieval types. A full list is below, along with the following information:
+
+**Name**: Name of the retrieval algorithm.
+
+**Index Type**: Which index type (if any) this relies on.
+
+**Uses an LLM**: Whether this retrieval method uses an LLM.
+
+**When to Use**: Our commentary on when you should considering using this retrieval method.
+
+**Description**: Description of what this retrieval algorithm is doing.
+
+| Name                      | Index Type                   | Uses an LLM               | When to Use                                                                                                                                   | Description                                                                                                                                                                                                                                                                                      |
+|---------------------------|------------------------------|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [Vectorstore](/docs/how_to/vectorstore_retriever/)               | Vectorstore                  | No                        | If you are just getting started and looking for something quick and easy.                                                                     | This is the simplest method and the one that is easiest to get started with. It involves creating embeddings for each piece of text.                                                                                                                                                             |
+| [ParentDocument](/docs/how_to/parent_document_retriever/)            | Vectorstore + Document Store | No                        | If your pages have lots of smaller pieces of distinct information that are best indexed by themselves, but best retrieved all together.       | This involves indexing multiple chunks for each document. Then you find the chunks that are most similar in embedding space, but you retrieve the whole parent document and return that (rather than individual chunks).                                                                         |
+| [Multi Vector](/docs/how_to/multi_vector/)              | Vectorstore + Document Store | Sometimes during indexing | If you are able to extract information from documents that you think is more relevant to index than the text itself.                          | This involves creating multiple vectors for each document. Each vector could be created in a myriad of ways - examples include summaries of the text and hypothetical questions.                                                                                                                 |
+| [Self Query](/docs/how_to/self_query/)               | Vectorstore                  | Yes                       | If users are asking questions that are better answered by fetching documents based on metadata rather than similarity with the text.          | This uses an LLM to transform user input into two things: (1) a string to look up semantically, (2) a metadata filer to go along with it. This is useful because oftentimes questions are about the METADATA of documents (not the content itself).                                              |
+| [Contextual Compression](/docs/how_to/contextual_compression/)    | Any                          | Sometimes                 | If you are finding that your retrieved documents contain too much irrelevant information and are distracting the LLM.                         | This puts a post-processing step on top of another retriever and extracts only the most relevant information from retrieved documents. This can be done with embeddings or an LLM.                                                                                                               |
+| [Time-Weighted Vectorstore](/docs/how_to/time_weighted_vectorstore/) | Vectorstore                  | No                        | If you have timestamps associated with your documents, and you want to retrieve the most recent ones                                          | This fetches documents based on a combination of semantic similarity (as in normal vector retrieval) and recency (looking at timestamps of indexed documents)                                                                                                                                    |
+| [Multi-Query Retriever](/docs/how_to/MultiQueryRetriever/)     | Any                          | Yes                       | If users are asking questions that are complex and require multiple pieces of distinct information to respond                                 | This uses an LLM to generate multiple queries from the original one. This is useful when the original query needs pieces of information about multiple topics to be properly answered. By generating multiple queries, we can then fetch documents for each of them.                             |
+| [Ensemble](/docs/how_to/ensemble_retriever/)                  | Any                          | No                        | If you have multiple retrieval methods and want to try combining them.                                                                        | This fetches documents from multiple retrievers and then combines them.                                                                                                                                                                                                                          |
+
+
+### Text splitting
+
+LangChain offers many different types of `text splitters`.
+These all live in the `langchain-text-splitters` package.
+
+Table columns:
+
+- **Name**: Name of the text splitter
+- **Classes**: Classes that implement this text splitter
+- **Splits On**: How this text splitter splits text
+- **Adds Metadata**: Whether or not this text splitter adds metadata about where each chunk came from.
+- **Description**: Description of the splitter, including recommendation on when to use it.
+
+
+| Name     | Classes                                                                                                                                                                                                             | Splits On                                                   | Adds Metadata | Description                                                                                                                                                                                                                                                                  |
+|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Recursive | [RecursiveCharacterTextSplitter](/docs/how_to/recursive_text_splitter/), [RecursiveJsonSplitter](/docs/how_to/recursive_json_splitter/) | A list of user defined characters     |               | Recursively splits text. This splitting is trying to keep related pieces of text next to each other. This is the `recommended way` to start splitting text.                                                                                                                    |
+| HTML      | [HTMLHeaderTextSplitter](/docs/how_to/HTML_header_metadata_splitter/), [HTMLSectionSplitter](/docs/how_to/HTML_section_aware_splitter/)          | HTML specific characters                                                                                                 | ✅             | Splits text based on HTML-specific characters. Notably, this adds in relevant information about where that chunk came from (based on the HTML)                                                                                                                               |
+| Markdown  | [MarkdownHeaderTextSplitter](/docs/how_to/markdown_header_metadata_splitter/),                                                                                                           | Markdown specific characters                                                                                    | ✅             | Splits text based on Markdown-specific characters. Notably, this adds in relevant information about where that chunk came from (based on the Markdown)                                                                                                                       |
+| Code      | [many languages](/docs/how_to/code_splitter/)                                                                                                                                 | Code (Python, JS) specific characters                                                                           |               | Splits text based on characters specific to coding languages. 15 different languages are available to choose from.                                                                                                                                                           |
+| Token    | [many classes](/docs/how_to/split_by_token/)                                                                                                                                  | Tokens                                                                                                          |               | Splits text on tokens. There exist a few different ways to measure tokens.                                                                                                                                                                                                   |
+| Character  | [CharacterTextSplitter](/docs/how_to/character_text_splitter/)                                                                                                                | A user defined character                                                                                        |               | Splits text based on a user defined character. One of the simpler methods.                                                                                                                                                                                                   |
+| Semantic Chunker (Experimental) | [SemanticChunker](/docs/how_to/semantic-chunker/)                                                                                                                             | Sentences                                                                                                       |               | First splits on sentences. Then combines ones next to each other if they are semantically similar enough. Taken from [Greg Kamradt](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/tutorials/LevelsOfTextSplitting/5_Levels_Of_Text_Splitting.ipynb) |
+| Integration: AI21 Semantic | [AI21SemanticTextSplitter](/docs/integrations/document_transformers/ai21_semantic_text_splitter/)                                                                                                                    |    ✅           | Identifies distinct topics that form coherent pieces of text and splits along those.                                                                                                                                                                                         |
--- a/docs/docs/contributing/documentation/style_guide.mdx
+++ b/docs/docs/contributing/documentation/style_guide.mdx
@@ -16,15 +16,15 @@ LangChain's documentation aspires to follow the [Diataxis framework](https://dia
 Under this framework, all documentation falls under one of four categories:

 - **Tutorials**: Lessons that take the reader by the hand through a series of conceptual steps to complete a project.
-  - An example of this is our [LCEL streaming guide](/docs/expression_language/streaming).
-  - Our guides on [custom components](/docs/modules/model_io/chat/custom_chat_model) is another one.
+  - An example of this is our [LCEL streaming guide](/docs/how_to/streaming).
+  - Our guides on [custom components](/docs/how_to/custom_chat_model) is another one.
 - **How-to guides**: Guides that take the reader through the steps required to solve a real-world problem.
-  - The clearest examples of this are our [Use case](/docs/use_cases/) quickstart pages.
+  - The clearest examples of this are our [Use case](/docs/how_to#use-cases) quickstart pages.
 - **Reference**: Technical descriptions of the machinery and how to operate it.
-  - Our [Runnable interface](/docs/expression_language/interface) page is an example of this.
+  - Our [Runnable interface](/docs/concepts#interface) page is an example of this.
  - The [API reference pages](https://api.python.langchain.com/) are another.
 - **Explanation**: Explanations that clarify and illuminate a particular topic.
-  - The [LCEL primitives pages](/docs/expression_language/primitives/sequence) are an example of this.
+  - The [LCEL primitives pages](/docs/how_to/sequence) are an example of this.

 Each category serves a distinct purpose and requires a specific approach to writing and structuring the content.

@@ -35,14 +35,14 @@ when contributing new documentation:

 ### Getting started

-The [getting started section](/docs/get_started/introduction) includes a high-level introduction to LangChain, a quickstart that
+The [getting started section](/docs/introduction) includes a high-level introduction to LangChain, a quickstart that
 tours LangChain's various features, and logistical instructions around installation and project setup.

 It contains elements of **How-to guides** and **Explanations**.

 ### Use cases

-[Use cases](/docs/use_cases/) are guides that are meant to show how to use LangChain to accomplish a specific task (RAG, information extraction, etc.).
+[Use cases](/docs/how_to#use-cases) are guides that are meant to show how to use LangChain to accomplish a specific task (RAG, information extraction, etc.).
 The quickstarts should be good entrypoints for first-time LangChain developers who prefer to learn by getting something practical prototyped,
 then taking the pieces apart retrospectively. These should mirror what LangChain is good at.

@@ -55,7 +55,7 @@ The below sections are listed roughly in order of increasing level of abstractio

 ### Expression Language

-[LangChain Expression Language (LCEL)](/docs/expression_language/) is the fundamental way that most LangChain components fit together, and this section is designed to teach
+[LangChain Expression Language (LCEL)](/docs/concepts#langchain-expression-language) is the fundamental way that most LangChain components fit together, and this section is designed to teach
 developers how to use it to build with LangChain's primitives effectively.

 This section should contains **Tutorials** that teach how to stream and use LCEL primitives for more abstract tasks, **Explanations** of specific behaviors,
@@ -63,7 +63,7 @@ and some **References** for how to use different methods in the Runnable interfa

 ### Components

-The [components section](/docs/modules) covers concepts one level of abstraction higher than LCEL.
+The [components section](/docs/concepts) covers concepts one level of abstraction higher than LCEL.
 Abstract base classes like `BaseChatModel` and `BaseRetriever` should be covered here, as well as core implementations of these base classes,
 such as `ChatPromptTemplate` and `RecursiveCharacterTextSplitter`. Customization guides belong here too.

@@ -88,7 +88,7 @@ Concepts covered in `Integrations` should generally exist in `langchain_communit

 ### Guides and Ecosystem

-The [Guides](/docs/guides) and [Ecosystem](/docs/langsmith/) sections should contain guides that address higher-level problems than the sections above.
+The [Guides](/docs/tutorials) and [Ecosystem](/docs/langsmith/) sections should contain guides that address higher-level problems than the sections above.
 This includes, but is not limited to, considerations around productionization and development workflows.

 These should contain mostly **How-to guides**, **Explanations**, and **Tutorials**.
@@ -102,7 +102,7 @@ LangChain's API references. Should act as **References** (as the name implies) w
 We have set up our docs to assist a new developer to LangChain. Let's walk through the intended path:

 - The developer lands on https://python.langchain.com, and reads through the introduction and the diagram.
- If they are just curious, they may be drawn to the [Quickstart](/docs/get_started/quickstart) to get a high-level tour of what LangChain contains.
+- If they are just curious, they may be drawn to the [Quickstart](/docs/tutorials/llm_chain) to get a high-level tour of what LangChain contains.
 - If they have a specific task in mind that they want to accomplish, they will be drawn to the Use-Case section. The use-case should provide a good, concrete hook that shows the value LangChain can provide them and be a good entrypoint to the framework.
 - They can then move to learn more about the fundamentals of LangChain through the Expression Language sections.
 - Next, they can learn about LangChain's various components and integrations.
--- a/docs/docs/how_to/MultiQueryRetriever.ipynb
+++ b/docs/docs/how_to/MultiQueryRetriever.ipynb
@@ -7,14 +7,16 @@
   "source": [
    "# How to use the MultiQueryRetriever\n",
    "\n",
-    "Distance-based vector database retrieval embeds (represents) queries in high-dimensional space and finds similar embedded documents based on \"distance\". But, retrieval may produce different results with subtle changes in query wording or if the embeddings do not capture the semantics of the data well. Prompt engineering / tuning is sometimes done to manually address these problems, but can be tedious.\n",
+    "Distance-based vector database retrieval embeds (represents) queries in high-dimensional space and finds similar embedded documents based on a distance metric. But, retrieval may produce different results with subtle changes in query wording, or if the embeddings do not capture the semantics of the data well. Prompt engineering / tuning is sometimes done to manually address these problems, but can be tedious.\n",
    "\n",
-    "The `MultiQueryRetriever` automates the process of prompt tuning by using an LLM to generate multiple queries from different perspectives for a given user input query. For each query, it retrieves a set of relevant documents and takes the unique union across all queries to get a larger set of potentially relevant documents. By generating multiple perspectives on the same question, the `MultiQueryRetriever` might be able to overcome some of the limitations of the distance-based retrieval and get a richer set of results."
+    "The [MultiQueryRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html) automates the process of prompt tuning by using an LLM to generate multiple queries from different perspectives for a given user input query. For each query, it retrieves a set of relevant documents and takes the unique union across all queries to get a larger set of potentially relevant documents. By generating multiple perspectives on the same question, the `MultiQueryRetriever` can mitigate some of the limitations of the distance-based retrieval and get a richer set of results.\n",
+    "\n",
+    "Let's build a vectorstore using the [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng from the [RAG tutorial](/docs/tutorials/rag):"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "994d6c74",
   "metadata": {},
   "outputs": [],
@@ -50,7 +52,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "edbca101",
   "metadata": {},
   "outputs": [],
@@ -67,7 +69,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "9e6d3b69",
   "metadata": {},
   "outputs": [],
@@ -81,15 +83,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
-   "id": "e5203612",
+   "execution_count": 4,
+   "id": "bc93dc2b-9407-48b0-9f9a-338247e7eb69",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be approached?', '2. What are the different methods for Task Decomposition?', '3. What are the various approaches to decomposing tasks?']\n"
+      "INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be achieved through different methods?', '2. What strategies are commonly used for Task Decomposition?', '3. What are the various techniques for breaking down tasks in Task Decomposition?']\n"
     ]
    },
    {
@@ -98,16 +100,24 @@
       "5"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "unique_docs = retriever_from_llm.get_relevant_documents(query=question)\n",
+    "unique_docs = retriever_from_llm.invoke(question)\n",
    "len(unique_docs)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "7e170263-facd-4065-bb68-d11fb9123a45",
+   "metadata": {},
+   "source": [
+    "Note that the underlying queries generated by the retriever are logged at the `INFO` level."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "c54a282f",
@@ -115,37 +125,45 @@
   "source": [
    "#### Supplying your own prompt\n",
    "\n",
-    "You can also supply a prompt along with an output parser to split the results into a list of queries."
+    "Under the hood, `MultiQueryRetriever` generates queries using a specific [prompt](https://api.python.langchain.com/en/latest/_modules/langchain/retrievers/multi_query.html#MultiQueryRetriever). To customize this prompt:\n",
+    "\n",
+    "1. Make a [PromptTemplate](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.prompt.PromptTemplate.html) with an input variable for the question;\n",
+    "2. Implement an [output parser](/docs/concepts#output-parsers) like the one below to split the result into a list of queries.\n",
+    "\n",
+    "The prompt and output parser together must support the generation of a list of queries."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "id": "d9afb0ca",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/chestercurme/.pyenv/versions/3.10.4/envs/sandbox310/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 0.3.0. Use RunnableSequence, e.g., `prompt | llm` instead.\n",
+      "  warn_deprecated(\n"
+     ]
+    }
+   ],
   "source": [
    "from typing import List\n",
    "\n",
    "from langchain.chains import LLMChain\n",
-    "from langchain.output_parsers import PydanticOutputParser\n",
+    "from langchain_core.output_parsers import BaseOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
-    "from pydantic import BaseModel, Field\n",
+    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
    "\n",
    "\n",
    "# Output parser will split the LLM result into a list of queries\n",
-    "class LineList(BaseModel):\n",
-    "    # \"lines\" is the key (attribute name) of the parsed output\n",
-    "    lines: List[str] = Field(description=\"Lines of text\")\n",
+    "class LineListOutputParser(BaseOutputParser[List[str]]):\n",
+    "    \"\"\"Output parser for a list of lines.\"\"\"\n",
    "\n",
-    "\n",
-    "class LineListOutputParser(PydanticOutputParser):\n",
-    "    def __init__(self) -> None:\n",
-    "        super().__init__(pydantic_object=LineList)\n",
-    "\n",
-    "    def parse(self, text: str) -> LineList:\n",
+    "    def parse(self, text: str) -> List[str]:\n",
    "        lines = text.strip().split(\"\\n\")\n",
-    "        return LineList(lines=lines)\n",
+    "        return lines\n",
    "\n",
    "\n",
    "output_parser = LineListOutputParser()\n",
@@ -170,24 +188,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
-   "id": "6660d7ee",
+   "execution_count": 6,
+   "id": "2eca2d96-8057-4ed9-873d-fa1064c09acf",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "INFO:langchain.retrievers.multi_query:Generated queries: [\"1. What is the course's perspective on regression?\", '2. Can you provide information on regression as discussed in the course?', '3. How does the course cover the topic of regression?', \"4. What are the course's teachings on regression?\", '5. In relation to the course, what is mentioned about regression?']\n"
+      "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide insights on regression from the course material?', '2. How is regression discussed in the course content?', '3. What information does the course offer about regression analysis?', '4. What are the teachings of the course regarding regression?', '5. In what manner is regression covered in the course curriculum?']\n"
     ]
    },
    {
     "data": {
      "text/plain": [
-       "11"
+       "9"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -199,9 +217,7 @@
    ")  # \"lines\" is the key (attribute name) of the parsed output\n",
    "\n",
    "# Results\n",
-    "unique_docs = retriever.get_relevant_documents(\n",
-    "    query=\"What does the course say about regression?\"\n",
-    ")\n",
+    "unique_docs = retriever.invoke(\"What does the course say about regression?\")\n",
    "len(unique_docs)"
   ]
  }
@@ -222,7 +238,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.10.4"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/add_scores_retriever.ipynb
+++ b/docs/docs/how_to/add_scores_retriever.ipynb
@@ -0,0 +1,446 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "9d59582a-6473-4b34-929b-3e94cb443c3d",
+   "metadata": {},
+   "source": [
+    "# How to add scores to retriever results\n",
+    "\n",
+    "Retrievers will return sequences of [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects, which by default include no information about the process that retrieved them (e.g., a similarity score against a query). Here we demonstrate how to add retrieval scores to the `.metadata` of documents:\n",
+    "1. From [vectorstore retrievers](/docs/how_to/vectorstore_retriever);\n",
+    "2. From higher-order LangChain retrievers, such as [SelfQueryRetriever](/docs/how_to/self_query) or [MultiVectorRetriever](/docs/how_to/multi_vector).\n",
+    "\n",
+    "For (1), we will implement a short wrapper function around the corresponding vector store. For (2), we will update a method of the corresponding class.\n",
+    "\n",
+    "## Create vector store\n",
+    "\n",
+    "First we populate a vector store with some data. We will use a [PineconeVectorStore](https://api.python.langchain.com/en/latest/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html), but this guide is compatible with any LangChain vector store that implements a `.similarity_search_with_score` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b8cfcb1b-64ee-4b91-8d82-ce7803834985",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.documents import Document\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "from langchain_pinecone import PineconeVectorStore\n",
+    "\n",
+    "docs = [\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
+    "        metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
+    "        metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
+    "        metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
+    "        metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Toys come alive and have a blast doing so\",\n",
+    "        metadata={\"year\": 1995, \"genre\": \"animated\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
+    "        metadata={\n",
+    "            \"year\": 1979,\n",
+    "            \"director\": \"Andrei Tarkovsky\",\n",
+    "            \"genre\": \"thriller\",\n",
+    "            \"rating\": 9.9,\n",
+    "        },\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "vectorstore = PineconeVectorStore.from_documents(\n",
+    "    docs, index_name=\"sample\", embedding=OpenAIEmbeddings()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22ac5ef6-ce18-427f-a91c-62b38a8b41e9",
+   "metadata": {},
+   "source": [
+    "## Retriever\n",
+    "\n",
+    "To obtain scores from a vector store retriever, we wrap the underlying vector store's `.similarity_search_with_score` method in a short function that packages scores into the associated document's metadata.\n",
+    "\n",
+    "We add a `@chain` decorator to the function to create a [Runnable](/docs/concepts/#langchain-expression-language) that can be used similarly to a typical retriever."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7e5677c3-f6ee-4974-ab5f-a0f50c199d45",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "\n",
+    "from langchain_core.documents import Document\n",
+    "from langchain_core.runnables import chain\n",
+    "\n",
+    "\n",
+    "@chain\n",
+    "def retriever(query: str) -> List[Document]:\n",
+    "    docs, scores = zip(*vectorstore.similarity_search_with_score(query))\n",
+    "    for doc, score in zip(docs, scores):\n",
+    "        doc.metadata[\"score\"] = score\n",
+    "\n",
+    "    return docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c9cad75e-b955-4012-989c-3c1820b49ba9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993.0, 'score': 0.84429127}),\n",
+       " Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995.0, 'score': 0.792038262}),\n",
+       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979.0, 'score': 0.751571238}),\n",
+       " Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006.0, 'score': 0.747471571}))"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result = retriever.invoke(\"dinosaur\")\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6671308a-be8d-4c15-ae1f-5bd07b342560",
+   "metadata": {},
+   "source": [
+    "Note that similarity scores from the retrieval step are included in the metadata of the above documents."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af2e73a0-46a1-47e2-8103-68aaa637642a",
+   "metadata": {},
+   "source": [
+    "## SelfQueryRetriever\n",
+    "\n",
+    "`SelfQueryRetriever` will use a LLM to generate a query that is potentially structured-- for example, it can construct filters for the retrieval on top of the usual semantic-similarity driven selection. See [this guide](/docs/how_to/self_query) for more detail.\n",
+    "\n",
+    "`SelfQueryRetriever` includes a short (1 - 2 line) method `_get_docs_with_query` that executes the `vectorstore` search. We can subclass `SelfQueryRetriever` and override this method to propagate similarity scores.\n",
+    "\n",
+    "First, following the [how-to guide](/docs/how_to/self_query), we will need to establish some metadata on which to filter:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8280b829-2e81-4454-8adc-9a0930047fa2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains.query_constructor.base import AttributeInfo\n",
+    "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "metadata_field_info = [\n",
+    "    AttributeInfo(\n",
+    "        name=\"genre\",\n",
+    "        description=\"The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']\",\n",
+    "        type=\"string\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"year\",\n",
+    "        description=\"The year the movie was released\",\n",
+    "        type=\"integer\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"director\",\n",
+    "        description=\"The name of the movie director\",\n",
+    "        type=\"string\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
+    "    ),\n",
+    "]\n",
+    "document_content_description = \"Brief summary of a movie\"\n",
+    "llm = ChatOpenAI(temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a6c6fa8-1e2f-45ee-83e9-a6cbd82292d2",
+   "metadata": {},
+   "source": [
+    "We then override the `_get_docs_with_query` to use the `similarity_search_with_score` method of the underlying vector store: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "62c8f3fa-8b64-4afb-87c4-ccbbf9a8bc54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Any, Dict\n",
+    "\n",
+    "\n",
+    "class CustomSelfQueryRetriever(SelfQueryRetriever):\n",
+    "    def _get_docs_with_query(\n",
+    "        self, query: str, search_kwargs: Dict[str, Any]\n",
+    "    ) -> List[Document]:\n",
+    "        \"\"\"Get docs, adding score information.\"\"\"\n",
+    "        docs, scores = zip(\n",
+    "            *vectorstore.similarity_search_with_score(query, **search_kwargs)\n",
+    "        )\n",
+    "        for doc, score in zip(docs, scores):\n",
+    "            doc.metadata[\"score\"] = score\n",
+    "\n",
+    "        return docs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "56e40109-1db6-44c7-a6e6-6989175e267c",
+   "metadata": {},
+   "source": [
+    "Invoking this retriever will now include similarity scores in the document metadata. Note that the underlying structured-query capabilities of `SelfQueryRetriever` are retained."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3359a1ee-34ff-41b6-bded-64c05785b333",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993.0, 'score': 0.84429127}),)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "retriever = CustomSelfQueryRetriever.from_llm(\n",
+    "    llm,\n",
+    "    vectorstore,\n",
+    "    document_content_description,\n",
+    "    metadata_field_info,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "result = retriever.invoke(\"dinosaur movie with rating less than 8\")\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "689ab3ba-3494-448b-836e-05fbe1ffd51c",
+   "metadata": {},
+   "source": [
+    "## MultiVectorRetriever\n",
+    "\n",
+    "`MultiVectorRetriever` allows you to associate multiple vectors with a single document. This can be useful in a number of applications. For example, we can index small chunks of a larger document and run the retrieval on the chunks, but return the larger \"parent\" document when invoking the retriever. [ParentDocumentRetriever](/docs/how_to/parent_document_retriever/), a subclass of `MultiVectorRetriever`, includes convenience methods for populating a vector store to support this. Further applications are detailed in this [how-to guide](/docs/how_to/multi_vector/).\n",
+    "\n",
+    "To propagate similarity scores through this retriever, we can again subclass `MultiVectorRetriever` and override a method. This time we will override `_get_relevant_documents`.\n",
+    "\n",
+    "First, we prepare some fake data. We generate fake \"whole documents\" and store them in a document store; here we will use a simple [InMemoryStore](https://api.python.langchain.com/en/latest/stores/langchain_core.stores.InMemoryBaseStore.html)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a112e545-7b53-4fcd-9c4a-7a42a5cc646d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.storage import InMemoryStore\n",
+    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "# The storage layer for the parent documents\n",
+    "docstore = InMemoryStore()\n",
+    "fake_whole_documents = [\n",
+    "    (\"fake_id_1\", Document(page_content=\"fake whole document 1\")),\n",
+    "    (\"fake_id_2\", Document(page_content=\"fake whole document 2\")),\n",
+    "]\n",
+    "docstore.mset(fake_whole_documents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "453b7415-4a6d-45d4-a329-9c1d7271d1b2",
+   "metadata": {},
+   "source": [
+    "Next we will add some fake \"sub-documents\" to our vector store. We can link these sub-documents to the parent documents by populating the `\"doc_id\"` key in its metadata."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "314519c0-dde4-41ea-a1ab-d3cf1c17c63f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['62a85353-41ff-4346-bff7-be6c8ec2ed89',\n",
+       " '5d4a0e83-4cc5-40f1-bc73-ed9cbad0ee15',\n",
+       " '8c1d9a56-120f-45e4-ba70-a19cd19a38f4']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs = [\n",
+    "    Document(\n",
+    "        page_content=\"A snippet from a larger document discussing cats.\",\n",
+    "        metadata={\"doc_id\": \"fake_id_1\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A snippet from a larger document discussing discourse.\",\n",
+    "        metadata={\"doc_id\": \"fake_id_1\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A snippet from a larger document discussing chocolate.\",\n",
+    "        metadata={\"doc_id\": \"fake_id_2\"},\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "vectorstore.add_documents(docs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e391f7f3-5a58-40fd-89fa-a0815c5146f7",
+   "metadata": {},
+   "source": [
+    "To propagate the scores, we subclass `MultiVectorRetriever` and override its `_get_relevant_documents` method. Here we will make two changes:\n",
+    "\n",
+    "1. We will add similarity scores to the metadata of the corresponding \"sub-documents\" using the `similarity_search_with_score` method of the underlying vector store as above;\n",
+    "2. We will include a list of these sub-documents in the metadata of the retrieved parent document. This surfaces what snippets of text were identified by the retrieval, together with their corresponding similarity scores."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1de61de7-1b58-41d6-9dea-939fef7d741d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import defaultdict\n",
+    "\n",
+    "from langchain.retrievers import MultiVectorRetriever\n",
+    "from langchain_core.callbacks import CallbackManagerForRetrieverRun\n",
+    "\n",
+    "\n",
+    "class CustomMultiVectorRetriever(MultiVectorRetriever):\n",
+    "    def _get_relevant_documents(\n",
+    "        self, query: str, *, run_manager: CallbackManagerForRetrieverRun\n",
+    "    ) -> List[Document]:\n",
+    "        \"\"\"Get documents relevant to a query.\n",
+    "        Args:\n",
+    "            query: String to find relevant documents for\n",
+    "            run_manager: The callbacks handler to use\n",
+    "        Returns:\n",
+    "            List of relevant documents\n",
+    "        \"\"\"\n",
+    "        results = self.vectorstore.similarity_search_with_score(\n",
+    "            query, **self.search_kwargs\n",
+    "        )\n",
+    "\n",
+    "        # Map doc_ids to list of sub-documents, adding scores to metadata\n",
+    "        id_to_doc = defaultdict(list)\n",
+    "        for doc, score in results:\n",
+    "            doc_id = doc.metadata.get(\"doc_id\")\n",
+    "            if doc_id:\n",
+    "                doc.metadata[\"score\"] = score\n",
+    "                id_to_doc[doc_id].append(doc)\n",
+    "\n",
+    "        # Fetch documents corresponding to doc_ids, retaining sub_docs in metadata\n",
+    "        docs = []\n",
+    "        for _id, sub_docs in id_to_doc.items():\n",
+    "            docstore_docs = self.docstore.mget([_id])\n",
+    "            if docstore_docs:\n",
+    "                if doc := docstore_docs[0]:\n",
+    "                    doc.metadata[\"sub_docs\"] = sub_docs\n",
+    "                    docs.append(doc)\n",
+    "\n",
+    "        return docs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7af27b38-631c-463f-9d66-bcc985f06a4f",
+   "metadata": {},
+   "source": [
+    "Invoking this retriever, we can see that it identifies the correct parent document, including the relevant snippet from the sub-document with similarity score."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "dc42a1be-22e1-4ade-b1bd-bafb85f2424f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='fake whole document 1', metadata={'sub_docs': [Document(page_content='A snippet from a larger document discussing cats.', metadata={'doc_id': 'fake_id_1', 'score': 0.831276655})]})]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "retriever = CustomMultiVectorRetriever(vectorstore=vectorstore, docstore=docstore)\n",
+    "\n",
+    "retriever.invoke(\"cat\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/how_to/agent_executor.ipynb
+++ b/docs/docs/how_to/agent_executor.ipynb
@@ -811,9 +811,10 @@
    "\n",
    "If you want to continue using LangChain agents, some good advanced guides are:\n",
    "\n",
-    "- [How to create a custom agent](/docs/how_to/custom_agent)\n",
-    "- [How to stream responses from an agent](/docs/how_to/agents_streaming)\n",
-    "- [How to return structured output from an agent](/docs/how_to/agent_structured)"
+    "- [How to use LangGraph's built-in versions of `AgentExecutor`](/docs/how_to/migrate_agent)\n",
+    "- [How to create a custom agent](https://python.langchain.com/v0.1/docs/modules/agents/how_to/custom_agent/)\n",
+    "- [How to stream responses from an agent](https://python.langchain.com/v0.1/docs/modules/agents/how_to/streaming/)\n",
+    "- [How to return structured output from an agent](https://python.langchain.com/v0.1/docs/modules/agents/how_to/agent_structured/)"
   ]
  },
  {
--- a/docs/docs/how_to/assign.ipynb
+++ b/docs/docs/how_to/assign.ipynb
@@ -16,21 +16,20 @@
   "source": [
    "# How to add values to a chain's state\n",
    "\n",
-    "An alternate way of [passing data through](/docs/how_to/passthrough) steps of a chain is to leave the current values of the chain state unchanged while assigning a new value under a given key. The [`RunnablePassthrough.assign()`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html#langchain_core.runnables.passthrough.RunnablePassthrough.assign) static method takes an input value and adds the extra arguments passed to the assign function.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "This is useful in the common [LangChain Expression Language](/docs/concepts/#langchain-expression-language) pattern of additively creating a dictionary to use as input to a later step.\n",
-    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
    "- [Chaining runnables](/docs/how_to/sequence/)\n",
    "- [Calling runnables in parallel](/docs/how_to/parallel/)\n",
    "- [Custom functions](/docs/how_to/functions/)\n",
    "- [Passing data through](/docs/how_to/passthrough)\n",
-    "`} />\n",
-    "```\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "An alternate way of [passing data through](/docs/how_to/passthrough) steps of a chain is to leave the current values of the chain state unchanged while assigning a new value under a given key. The [`RunnablePassthrough.assign()`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html#langchain_core.runnables.passthrough.RunnablePassthrough.assign) static method takes an input value and adds the extra arguments passed to the assign function.\n",
+    "\n",
+    "This is useful in the common [LangChain Expression Language](/docs/concepts/#langchain-expression-language) pattern of additively creating a dictionary to use as input to a later step.\n",
    "\n",
    "Here's an example:"
   ]
@@ -184,9 +183,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
--- a/docs/docs/how_to/binding.ipynb
+++ b/docs/docs/how_to/binding.ipynb
@@ -18,17 +18,16 @@
   "source": [
    "# How to attach runtime arguments to a Runnable\n",
    "\n",
-    "Sometimes we want to invoke a [`Runnable`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html) within a [RunnableSequence](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableSequence.html) with constant arguments that are not part of the output of the preceding Runnable in the sequence, and which are not part of the user input. We can use the [`Runnable.bind()`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind) method to set these arguments ahead of time.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
    "- [Chaining runnables](/docs/how_to/sequence/)\n",
    "- [Tool calling](/docs/how_to/tool_calling/)\n",
-    "`} />\n",
-    "```\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "Sometimes we want to invoke a [`Runnable`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html) within a [RunnableSequence](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableSequence.html) with constant arguments that are not part of the output of the preceding Runnable in the sequence, and which are not part of the user input. We can use the [`Runnable.bind()`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind) method to set these arguments ahead of time.\n",
    "\n",
    "## Binding stop sequences\n",
    "\n",
@@ -228,7 +227,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/caching_embeddings.ipynb
+++ b/docs/docs/how_to/caching_embeddings.ipynb
@@ -18,11 +18,12 @@
    "- document_embedding_cache: Any [`ByteStore`](/docs/integrations/stores/) for caching document embeddings.\n",
    "- batch_size: (optional, defaults to `None`) The number of documents to embed between store updates.\n",
    "- namespace: (optional, defaults to `\"\"`) The namespace to use for document cache. This namespace is used to avoid collisions with other caches. For example, set it to the name of the embedding model used.\n",
+    "- query_embedding_cache: (optional, defaults to `None` or not caching) A [`ByteStore`](/docs/integrations/stores/) for caching query embeddings, or `True` to use the same store as `document_embedding_cache`.\n",
    "\n",
    "**Attention**:\n",
    "\n",
    "- Be sure to set the `namespace` parameter to avoid collisions of the same text embedded using different embeddings models.\n",
-    "- Currently `CacheBackedEmbeddings` does not cache embedding created with `embed_query()` `aembed_query()` methods."
+    "- `CacheBackedEmbeddings` does not cache query embeddings by default. To enable query caching, one need to specify a `query_embedding_cache`."
   ]
  },
  {
@@ -123,7 +124,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "raw_documents = TextLoader(\"../../state_of_the_union.txt\").load()\n",
+    "raw_documents = TextLoader(\"state_of_the_union.txt\").load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "documents = text_splitter.split_documents(raw_documents)"
   ]
--- a/docs/docs/how_to/character_text_splitter.ipynb
+++ b/docs/docs/how_to/character_text_splitter.ipynb
@@ -45,7 +45,7 @@
    "from langchain_text_splitters import CharacterTextSplitter\n",
    "\n",
    "# Load an example document\n",
-    "with open(\"../../../docs/modules/state_of_the_union.txt\") as f:\n",
+    "with open(\"state_of_the_union.txt\") as f:\n",
    "    state_of_the_union = f.read()\n",
    "\n",
    "text_splitter = CharacterTextSplitter(\n",
--- a/docs/docs/how_to/chat_model_caching.ipynb
+++ b/docs/docs/how_to/chat_model_caching.ipynb
@@ -7,21 +7,20 @@
   "source": [
    "# How to cache chat model responses\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Chat models](/docs/concepts/#chat-models)\n",
+    "- [LLMs](/docs/concepts/#llms)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "LangChain provides an optional caching layer for chat models. This is useful for two main reasons:\n",
    "\n",
    "- It can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times. This is especially useful during app development.\n",
    "- It can speed up your application by reducing the number of API calls you make to the LLM provider.\n",
    "\n",
-    "This guide will walk you through how to enable this in your apps.\n",
-    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "- [LLMs](/docs/concepts/#llms)\n",
-    "`} />\n",
-    "```"
+    "This guide will walk you through how to enable this in your apps."
   ]
  },
  {
@@ -267,7 +266,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/chat_token_usage_tracking.ipynb
+++ b/docs/docs/how_to/chat_token_usage_tracking.ipynb
@@ -7,15 +7,14 @@
   "source": [
    "# How to track token usage in ChatModels\n",
    "\n",
-    "Tracking token usage to calculate cost is an important part of putting your app in production. This guide goes over how to obtain this information from your LangChain model calls.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "`} />\n",
-    "```"
+    "\n",
+    ":::\n",
+    "\n",
+    "Tracking token usage to calculate cost is an important part of putting your app in production. This guide goes over how to obtain this information from your LangChain model calls."
   ]
  },
  {
@@ -25,7 +24,7 @@
   "source": [
    "## Using AIMessage.response_metadata\n",
    "\n",
-    "A number of model providers return token usage information as part of the chat generation response. When available, this is included in the [`AIMessage.response_metadata`](/docs/modules/model_io/chat/response_metadata/) field. Here's an example with OpenAI:"
+    "A number of model providers return token usage information as part of the chat generation response. When available, this is included in the [`AIMessage.response_metadata`](/docs/how_to/response_metadata) field. Here's an example with OpenAI:"
   ]
  },
  {
@@ -365,7 +364,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/chatbots_memory.ipynb
+++ b/docs/docs/how_to/chatbots_memory.ipynb
@@ -142,7 +142,7 @@
    "\n",
    "## Chat history\n",
    "\n",
-    "It's perfectly fine to store and pass messages directly as an array, but we can use LangChain's built-in [message history class](/docs/modules/memory/chat_messages/) to store and load messages as well. Instances of this class are responsible for storing and loading chat messages from persistent storage. LangChain integrates with many providers - you can see a [list of integrations here](/docs/integrations/memory) - but for this demo we will use an ephemeral demo class.\n",
+    "It's perfectly fine to store and pass messages directly as an array, but we can use LangChain's built-in [message history class](https://api.python.langchain.com/en/latest/langchain_api_reference.html#module-langchain.memory) to store and load messages as well. Instances of this class are responsible for storing and loading chat messages from persistent storage. LangChain integrates with many providers - you can see a [list of integrations here](/docs/integrations/memory) - but for this demo we will use an ephemeral demo class.\n",
    "\n",
    "Here's an example of the API:"
   ]
--- a/docs/docs/how_to/chatbots_retrieval.ipynb
+++ b/docs/docs/how_to/chatbots_retrieval.ipynb
@@ -15,7 +15,7 @@
   "source": [
    "# How to add retrieval to chatbots\n",
    "\n",
-    "Retrieval is a common technique chatbots use to augment their responses with data outside a chat model's training data. This section will cover how to implement retrieval in the context of chatbots, but it's worth noting that retrieval is a very subtle and deep topic - we encourage you to explore [other parts of the documentation](/docs/use_cases/question_answering/) that go into greater depth!\n",
+    "Retrieval is a common technique chatbots use to augment their responses with data outside a chat model's training data. This section will cover how to implement retrieval in the context of chatbots, but it's worth noting that retrieval is a very subtle and deep topic - we encourage you to explore [other parts of the documentation](/docs/how_to#qa-with-rag) that go into greater depth!\n",
    "\n",
    "## Setup\n",
    "\n",
@@ -80,7 +80,7 @@
   "source": [
    "## Creating a retriever\n",
    "\n",
-    "We'll use [the LangSmith documentation](https://docs.smith.langchain.com/overview) as source material and store the content in a vectorstore for later retrieval. Note that this example will gloss over some of the specifics around parsing and storing a data source - you can see more [in-depth documentation on creating retrieval systems here](/docs/use_cases/question_answering/).\n",
+    "We'll use [the LangSmith documentation](https://docs.smith.langchain.com/overview) as source material and store the content in a vectorstore for later retrieval. Note that this example will gloss over some of the specifics around parsing and storing a data source - you can see more [in-depth documentation on creating retrieval systems here](/docs/how_to#qa-with-rag).\n",
    "\n",
    "Let's use a document loader to pull text from the docs:"
   ]
@@ -737,7 +737,7 @@
   "source": [
    "## Further reading\n",
    "\n",
-    "This guide only scratches the surface of retrieval techniques. For more on different ways of ingesting, preparing, and retrieving the most relevant data, check out [this section](/docs/modules/data_connection/) of the docs."
+    "This guide only scratches the surface of retrieval techniques. For more on different ways of ingesting, preparing, and retrieving the most relevant data, check out the relevant how-to guides [here](/docs/how_to#document-loaders)."
   ]
  }
 ],
--- a/docs/docs/how_to/chatbots_tools.ipynb
+++ b/docs/docs/how_to/chatbots_tools.ipynb
@@ -17,11 +17,11 @@
    "\n",
    "This section will cover how to create conversational agents: chatbots that can interact with other systems and APIs using tools.\n",
    "\n",
-    "Before reading this guide, we recommend you read both [the chatbot quickstart](/docs/use_cases/chatbots/quickstart) in this section and be familiar with [the documentation on agents](/docs/tutorials/agents).\n",
+    "Before reading this guide, we recommend you read both [the chatbot quickstart](/docs/tutorials/chatbot) in this section and be familiar with [the documentation on agents](/docs/tutorials/agents).\n",
    "\n",
    "## Setup\n",
    "\n",
-    "For this guide, we'll be using an [OpenAI tools agent](/docs/modules/agents/agent_types/openai_tools) with a single tool for searching the web. The default will be powered by [Tavily](/docs/integrations/tools/tavily_search), but you can switch it out for any similar tool. The rest of this section will assume you're using Tavily.\n",
+    "For this guide, we'll be using an [OpenAI tools agent](/docs/how_to/agent_executor) with a single tool for searching the web. The default will be powered by [Tavily](/docs/integrations/tools/tavily_search), but you can switch it out for any similar tool. The rest of this section will assume you're using Tavily.\n",
    "\n",
    "You'll need to [sign up for an account](https://tavily.com/) on the Tavily website, and install the following packages:"
   ]
@@ -437,7 +437,7 @@
    "\n",
    "Other types agents can also support conversational responses too - for more, check out the [agents section](/docs/tutorials/agents).\n",
    "\n",
-    "For more on tool usage, you can also check out [this use case section](/docs/use_cases/tool_use/)."
+    "For more on tool usage, you can also check out [this use case section](/docs/how_to#tools)."
   ]
  }
 ],
--- a/docs/docs/how_to/configure.ipynb
+++ b/docs/docs/how_to/configure.ipynb
@@ -18,23 +18,22 @@
   "source": [
    "# How to configure runtime chain internals\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
+    "- [Chaining runnables](/docs/how_to/sequence/)\n",
+    "- [Binding runtime arguments](/docs/how_to/binding/)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "Sometimes you may want to experiment with, or even expose to the end user, multiple different ways of doing things within your chains.\n",
    "This can include tweaking parameters such as temperature or even swapping out one model for another.\n",
    "In order to make this experience as easy as possible, we have defined two methods.\n",
    "\n",
    "- A `configurable_fields` method. This lets you configure particular fields of a runnable.\n",
    "  - This is related to the [`.bind`](/docs/how_to/binding) method on runnables, but allows you to specify parameters for a given step in a chain at runtime rather than specifying them beforehand.\n",
-    "- A `configurable_alternatives` method. With this method, you can list out alternatives for any particular runnable that can be set during runtime, and swap them for those specified alternatives.\n",
-    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
-    "- [Chaining runnables](/docs/how_to/sequence/)\n",
-    "- [Binding runtime arguments](/docs/how_to/binding/)\n",
-    "`} />\n",
-    "```"
+    "- A `configurable_alternatives` method. With this method, you can list out alternatives for any particular runnable that can be set during runtime, and swap them for those specified alternatives."
   ]
  },
  {
@@ -613,7 +612,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/contextual_compression.ipynb
+++ b/docs/docs/how_to/contextual_compression.ipynb
@@ -12,13 +12,12 @@
    "Contextual compression is meant to fix this. The idea is simple: instead of immediately returning retrieved documents as-is, you can compress them using the context of the given query, so that only the relevant information is returned. “Compressing” here refers to both compressing the contents of an individual document and filtering out documents wholesale.\n",
    "\n",
    "To use the Contextual Compression Retriever, you'll need:\n",
+    "\n",
    "- a base retriever\n",
    "- a Document Compressor\n",
    "\n",
    "The Contextual Compression Retriever passes queries to the base retriever, takes the initial documents and passes them through the Document Compressor. The Document Compressor takes a list of documents and shortens it by reducing the contents of documents or dropping documents altogether.\n",
    "\n",
-    "![](https://drive.google.com/uc?id=1CtNgWODXZudxAWSRiWgSGEoTNrUFT98v)\n",
-    "\n",
    "## Get started"
   ]
  },
@@ -51,8 +50,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
-   "id": "2b0be066",
+   "execution_count": 2,
+   "id": "25c26947-958d-4219-8ca0-daa3a51bd344",
   "metadata": {},
   "outputs": [
    {
@@ -123,14 +122,12 @@
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain_text_splitters import CharacterTextSplitter\n",
    "\n",
-    "documents = TextLoader(\"../../state_of_the_union.txt\").load()\n",
+    "documents = TextLoader(\"state_of_the_union.txt\").load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "texts = text_splitter.split_documents(documents)\n",
    "retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever()\n",
    "\n",
-    "docs = retriever.get_relevant_documents(\n",
-    "    \"What did the president say about Ketanji Brown Jackson\"\n",
-    ")\n",
+    "docs = retriever.invoke(\"What did the president say about Ketanji Brown Jackson\")\n",
    "pretty_print_docs(docs)"
   ]
  },
@@ -145,24 +142,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "id": "f08d19e6",
+   "execution_count": 3,
+   "id": "d83e3c63-bcde-43e9-998e-35bf2ebef49b",
   "metadata": {},
   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/harrisonchase/workplace/langchain/libs/langchain/langchain/chains/llm.py:316: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
-      "  warnings.warn(\n",
-      "/Users/harrisonchase/workplace/langchain/libs/langchain/langchain/chains/llm.py:316: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
-      "  warnings.warn(\n",
-      "/Users/harrisonchase/workplace/langchain/libs/langchain/langchain/chains/llm.py:316: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
-      "  warnings.warn(\n",
-      "/Users/harrisonchase/workplace/langchain/libs/langchain/langchain/chains/llm.py:316: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
-      "  warnings.warn(\n"
-     ]
-    },
    {
     "name": "stdout",
     "output_type": "stream",
@@ -184,7 +167,7 @@
    "    base_compressor=compressor, base_retriever=retriever\n",
    ")\n",
    "\n",
-    "compressed_docs = compression_retriever.get_relevant_documents(\n",
+    "compressed_docs = compression_retriever.invoke(\n",
    "    \"What did the president say about Ketanji Jackson Brown\"\n",
    ")\n",
    "pretty_print_docs(compressed_docs)"
@@ -204,23 +187,9 @@
  {
   "cell_type": "code",
   "execution_count": 5,
-   "id": "6fa3ec79",
+   "id": "39b13654-01d9-4006-9550-5f3e77cb4f23",
   "metadata": {},
   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/harrisonchase/workplace/langchain/libs/langchain/langchain/chains/llm.py:316: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
-      "  warnings.warn(\n",
-      "/Users/harrisonchase/workplace/langchain/libs/langchain/langchain/chains/llm.py:316: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
-      "  warnings.warn(\n",
-      "/Users/harrisonchase/workplace/langchain/libs/langchain/langchain/chains/llm.py:316: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
-      "  warnings.warn(\n",
-      "/Users/harrisonchase/workplace/langchain/libs/langchain/langchain/chains/llm.py:316: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
-      "  warnings.warn(\n"
-     ]
-    },
    {
     "name": "stdout",
     "output_type": "stream",
@@ -245,7 +214,7 @@
    "    base_compressor=_filter, base_retriever=retriever\n",
    ")\n",
    "\n",
-    "compressed_docs = compression_retriever.get_relevant_documents(\n",
+    "compressed_docs = compression_retriever.invoke(\n",
    "    \"What did the president say about Ketanji Jackson Brown\"\n",
    ")\n",
    "pretty_print_docs(compressed_docs)"
@@ -264,7 +233,7 @@
  {
   "cell_type": "code",
   "execution_count": 6,
-   "id": "e84aceea",
+   "id": "ee8d9486-db9a-4e24-aa11-ae40f34cc908",
   "metadata": {},
   "outputs": [
    {
@@ -293,21 +262,7 @@
      "\n",
      "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
      "\n",
-      "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
-      "----------------------------------------------------------------------------------------------------\n",
-      "Document 3:\n",
-      "\n",
-      "And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n",
-      "\n",
-      "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
-      "\n",
-      "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n",
-      "\n",
-      "And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n",
-      "\n",
-      "So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together.  \n",
-      "\n",
-      "First, beat the opioid epidemic.\n"
+      "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n"
     ]
    }
   ],
@@ -321,7 +276,7 @@
    "    base_compressor=embeddings_filter, base_retriever=retriever\n",
    ")\n",
    "\n",
-    "compressed_docs = compression_retriever.get_relevant_documents(\n",
+    "compressed_docs = compression_retriever.invoke(\n",
    "    \"What did the president say about Ketanji Jackson Brown\"\n",
    ")\n",
    "pretty_print_docs(compressed_docs)"
@@ -340,7 +295,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
   "id": "617a1756",
   "metadata": {},
   "outputs": [],
@@ -359,8 +314,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
-   "id": "c715228a",
+   "execution_count": 8,
+   "id": "40b9c1db-7ac2-4257-935a-b107da50bb43",
   "metadata": {},
   "outputs": [
    {
@@ -398,7 +353,7 @@
    "    base_compressor=pipeline_compressor, base_retriever=retriever\n",
    ")\n",
    "\n",
-    "compressed_docs = compression_retriever.get_relevant_documents(\n",
+    "compressed_docs = compression_retriever.invoke(\n",
    "    \"What did the president say about Ketanji Jackson Brown\"\n",
    ")\n",
    "pretty_print_docs(compressed_docs)"
@@ -429,7 +384,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.10.4"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/custom_chat_model.ipynb
+++ b/docs/docs/how_to/custom_chat_model.ipynb
@@ -7,20 +7,19 @@
   "source": [
    "# How to create a custom chat model class\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Chat models](/docs/concepts/#chat-models)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "In this guide, we'll learn how to create a custom chat model using LangChain abstractions.\n",
    "\n",
    "Wrapping your LLM with the standard [`BaseChatModel`](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) interface allow you to use your LLM in existing LangChain programs with minimal code modifications!\n",
    "\n",
    "As an bonus, your LLM will automatically become a LangChain `Runnable` and will benefit from some optimizations out of the box (e.g., batch via a threadpool), async support, the `astream_events` API, etc.\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "`} />\n",
-    "```\n",
-    "\n",
    "## Inputs and outputs\n",
    "\n",
    "First, we need to talk about **messages**, which are the inputs and outputs of chat models.\n",
@@ -562,7 +561,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/custom_retriever.ipynb
+++ b/docs/docs/how_to/custom_retriever.ipynb
@@ -38,7 +38,7 @@
    "The logic inside of `_get_relevant_documents` can involve arbitrary calls to a database or to the web using requests.\n",
    "\n",
    ":::{.callout-tip}\n",
-    "By inherting from `BaseRetriever`, your retriever automatically becomes a LangChain [Runnable](/docs/expression_language/interface) and will gain the standard `Runnable` functionality out of the box!\n",
+    "By inherting from `BaseRetriever`, your retriever automatically becomes a LangChain [Runnable](/docs/concepts#interface) and will gain the standard `Runnable` functionality out of the box!\n",
    ":::\n",
    "\n",
    "\n",
--- a/docs/docs/how_to/document_loader_html.ipynb
+++ b/docs/docs/how_to/document_loader_html.ipynb
@@ -11,7 +11,7 @@
    "\n",
    "This covers how to load `HTML` documents into a LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
    "\n",
-    "Parsing HTML files often requires specialized tools. Here we demonstrate parsing via [Unstructured](https://unstructured-io.github.io/unstructured/) and [BeautifulSoup4](https://beautiful-soup-4.readthedocs.io/en/latest/), which can be installed via pip. Head over to the integrations page to find integrations with additional services, such as [Azure AI Document Intelligence](/docs/0.2.x/integrations/document_loaders/azure_document_intelligence) or [FireCrawl](/docs/0.2.x/integrations/document_loaders/firecrawl).\n",
+    "Parsing HTML files often requires specialized tools. Here we demonstrate parsing via [Unstructured](https://unstructured-io.github.io/unstructured/) and [BeautifulSoup4](https://beautiful-soup-4.readthedocs.io/en/latest/), which can be installed via pip. Head over to the integrations page to find integrations with additional services, such as [Azure AI Document Intelligence](/docs/integrations/document_loaders/azure_document_intelligence) or [FireCrawl](/docs/integrations/document_loaders/firecrawl).\n",
    "\n",
    "## Loading HTML with Unstructured"
   ]
--- a/docs/docs/how_to/few_shot_examples.ipynb
+++ b/docs/docs/how_to/few_shot_examples.ipynb
@@ -17,23 +17,22 @@
   "source": [
    "# How to use few shot examples\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
+    "- [Example selectors](/docs/concepts/#example-selectors)\n",
+    "- [LLMs](/docs/concepts/#llms)\n",
+    "- [Vectorstores](/docs/concepts/#vectorstores)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "In this guide, we'll learn how to create a simple prompt template that provides the model with example inputs and outputs when generating. Providing the LLM with a few such examples is called few-shotting, and is a simple yet powerful way to guide generation and in some cases drastically improve model performance.\n",
    "\n",
    "A few-shot prompt template can be constructed from either a set of examples, or from an [Example Selector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html) class responsible for choosing a subset of examples from the defined set.\n",
    "\n",
    "This guide will cover few-shotting with string prompt templates. For a guide on few-shotting with chat messages for chat models, see [here](/docs/how_to/few_shot_examples_chat/).\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
-    "- [Example selectors](/docs/concepts/#example-selectors)\n",
-    "- [LLMs](/docs/concepts/#llms)\n",
-    "- [Vectorstores](/docs/concepts/#vectorstores)\n",
-    "`} />\n",
-    "```\n",
-    "\n",
    "## Create a formatter for the few-shot examples\n",
    "\n",
    "Configure a formatter that will format the few-shot examples into a string. This formatter should be a `PromptTemplate` object."
@@ -390,7 +389,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/few_shot_examples_chat.ipynb
+++ b/docs/docs/how_to/few_shot_examples_chat.ipynb
@@ -17,24 +17,23 @@
   "source": [
    "# How to use few shot examples in chat models\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
+    "- [Example selectors](/docs/concepts/#example-selectors)\n",
+    "- [Chat models](/docs/concepts/#chat-model)\n",
+    "- [Vectorstores](/docs/concepts/#vectorstores)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "This guide covers how to prompt a chat model with example inputs and outputs. Providing the model with a few such examples is called few-shotting, and is a simple yet powerful way to guide generation and in some cases drastically improve model performance.\n",
    "\n",
    "There does not appear to be solid consensus on how best to do few-shot prompting, and the optimal prompt compilation will likely vary by model. Because of this, we provide few-shot prompt templates like the [FewShotChatMessagePromptTemplate](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate.html?highlight=fewshot#langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate) as a flexible starting point, and you can modify or replace them as you see fit.\n",
    "\n",
    "The goal of few-shot prompt templates are to dynamically select examples based on an input, and then format the examples in a final prompt to provide for the model.\n",
    "\n",
-    "**Note:** The following code examples are for chat models only, since `FewShotChatMessagePromptTemplates` are designed to output formatted [chat messages](/docs/concepts/#message-types) rather than pure strings. For similar few-shot prompt examples for pure string templates compatible with completion models (LLMs), see the [few-shot prompt templates](/docs/how_to/few_shot_examples/) guide.\n",
-    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
-    "- [Example selectors](/docs/concepts/#example-selectors)\n",
-    "- [Chat models](/docs/concepts/#chat-model)\n",
-    "- [Vectorstores](/docs/concepts/#vectorstores)\n",
-    "`} />\n",
-    "```"
+    "**Note:** The following code examples are for chat models only, since `FewShotChatMessagePromptTemplates` are designed to output formatted [chat messages](/docs/concepts/#message-types) rather than pure strings. For similar few-shot prompt examples for pure string templates compatible with completion models (LLMs), see the [few-shot prompt templates](/docs/how_to/few_shot_examples/) guide."
   ]
  },
  {
@@ -435,7 +434,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/function_calling.ipynb
+++ b/docs/docs/how_to/function_calling.ipynb
@@ -48,7 +48,7 @@
    "receive the tool call, execute it, and return the output to the LLM to inform its \n",
    "response. LangChain includes a suite of [built-in tools](/docs/integrations/tools/) \n",
    "and supports several methods for defining your own [custom tools](/docs/how_to/custom_tools). \n",
-    "Tool-calling is extremely useful for building [tool-using chains and agents](/docs/use_cases/tool_use), \n",
+    "Tool-calling is extremely useful for building [tool-using chains and agents](/docs/how_to#tools), \n",
    "and for getting structured outputs from models more generally.\n",
    "\n",
    "Providers adopt different conventions for formatting tool schemas and tool calls. \n",
@@ -262,7 +262,7 @@
    "are populated in the `.invalid_tool_calls` attribute. An `InvalidToolCall` can have \n",
    "a name, string arguments, identifier, and error message.\n",
    "\n",
-    "If desired, [output parsers](/docs/modules/model_io/output_parsers) can further \n",
+    "If desired, [output parsers](/docs/how_to#output-parsers) can further \n",
    "process the output. For example, we can convert back to the original Pydantic class:"
   ]
  },
@@ -351,7 +351,7 @@
   "id": "55046320-3466-4ec1-a1f8-336234ba9019",
   "metadata": {},
   "source": [
-    "Note that adding message chunks will merge their corresponding tool call chunks. This is the principle by which LangChain's various [tool output parsers](/docs/modules/model_io/output_parsers/types/openai_tools/) support streaming.\n",
+    "Note that adding message chunks will merge their corresponding tool call chunks. This is the principle by which LangChain's various [tool output parsers](/docs/how_to/output_parser_structured) support streaming.\n",
    "\n",
    "For example, below we accumulate tool call chunks:"
   ]
@@ -669,16 +669,14 @@
    "## Next steps\n",
    "\n",
    "- **Output parsing**: See [OpenAI Tools output\n",
-    "    parsers](/docs/modules/model_io/output_parsers/types/openai_tools/)\n",
-    "    and [OpenAI Functions output\n",
-    "    parsers](/docs/modules/model_io/output_parsers/types/openai_functions/)\n",
+    "    parsers](/docs/how_to/output_parser_structured)\n",
    "    to learn about extracting the function calling API responses into\n",
    "    various formats.\n",
    "- **Structured output chains**: [Some models have constructors](/docs/how_to/structured_output) that\n",
    "    handle creating a structured output chain for you.\n",
    "- **Tool use**: See how to construct chains and agents that\n",
    "    call the invoked tools in [these\n",
-    "    guides](/docs/use_cases/tool_use/)."
+    "    guides](/docs/how_to#tools)."
   ]
  }
 ],
@@ -698,7 +696,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/functions.ipynb
+++ b/docs/docs/how_to/functions.ipynb
@@ -18,6 +18,14 @@
   "source": [
    "# How to run custom functions\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
+    "- [Chaining runnables](/docs/how_to/sequence/)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "You can use arbitrary functions as [Runnables](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable). This is useful for formatting or when you need functionality not provided by other LangChain components, and custom functions used as Runnables are called [`RunnableLambdas`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableLambda.html).\n",
    "\n",
    "Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple argument.\n",
@@ -29,15 +37,6 @@
    "- How to accept and use run metadata in your custom function\n",
    "- How to stream with custom functions by having them return generators\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
-    "- [Chaining runnables](/docs/how_to/sequence/)\n",
-    "`} />\n",
-    "```\n",
-    "\n",
    "## Using the constructor\n",
    "\n",
    "Below, we explicitly wrap our custom logic using the `RunnableLambda` constructor:"
@@ -526,7 +525,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/hybrid.ipynb
+++ b/docs/docs/how_to/hybrid.ipynb
@@ -0,0 +1,392 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "14d3fd06",
+   "metadata": {
+    "id": "14d3fd06"
+   },
+   "source": [
+    "# Hybrid Search\n",
+    "\n",
+    "The standard search in LangChain is done by vector similarity. However, a number of vectorstores implementations (Astra DB, ElasticSearch, Neo4J, AzureSearch, ...) also support more advanced search combining vector similarity search and other search techniques (full-text, BM25, and so on). This is generally referred to as \"Hybrid\" search.\n",
+    "\n",
+    "**Step 1: Make sure the vectorstore you are using supports hybrid search**\n",
+    "\n",
+    "At the moment, there is no unified way to perform hybrid search in LangChain. Each vectorstore may have their own way to do it. This is generally exposed as a keyword argument that is passed in during `similarity_search`. By reading the documentation or source code, figure out whether the vectorstore you are using supports hybrid search, and, if so, how to use it.\n",
+    "\n",
+    "**Step 2: Add that parameter as a configurable field for the chain**\n",
+    "\n",
+    "This will let you easily call the chain and configure any relevant flags at runtime. See [this documentation](/docs/how_to/configure) for more information on configuration.\n",
+    "\n",
+    "**Step 3: Call the chain with that configurable field**\n",
+    "\n",
+    "Now, at runtime you can call this chain with configurable field.\n",
+    "\n",
+    "## Code Example\n",
+    "\n",
+    "Let's see a concrete example of what this looks like in code. We will use the Cassandra/CQL interface of Astra DB for this example.\n",
+    "\n",
+    "Install the following Python package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2efe35eea197769",
+   "metadata": {
+    "id": "c2efe35eea197769",
+    "outputId": "527275b4-076e-4b22-945c-e41a59188116"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install \"cassio>=0.1.7\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b4ef96d44341cd84",
+   "metadata": {
+    "collapsed": false,
+    "id": "b4ef96d44341cd84"
+   },
+   "source": [
+    "Get the [connection secrets](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html).\n",
+    "\n",
+    "Initialize cassio:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb2cef097277c32e",
+   "metadata": {
+    "id": "cb2cef097277c32e",
+    "outputId": "4c3d05a0-319a-44a0-8ec3-0a9c78453132"
+   },
+   "outputs": [],
+   "source": [
+    "import cassio\n",
+    "\n",
+    "cassio.init(\n",
+    "    database_id=\"Your database ID\",\n",
+    "    token=\"Your application token\",\n",
+    "    keyspace=\"Your key space\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e1e51444877f45eb",
+   "metadata": {
+    "collapsed": false,
+    "id": "e1e51444877f45eb"
+   },
+   "source": [
+    "Create the Cassandra VectorStore with a standard [index analyzer](https://docs.datastax.com/en/astra/astra-db-vector/cql/use-analyzers-with-cql.html). The index analyzer is needed to enable term matching."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7345de3c",
+   "metadata": {
+    "id": "7345de3c",
+    "outputId": "d38bcee0-0134-4ac6-8d35-afcce282481b"
+   },
+   "outputs": [],
+   "source": [
+    "from cassio.table.cql import STANDARD_ANALYZER\n",
+    "from langchain_community.vectorstores import Cassandra\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "embeddings = OpenAIEmbeddings()\n",
+    "vectorstore = Cassandra(\n",
+    "    embedding=embeddings,\n",
+    "    table_name=\"test_hybrid\",\n",
+    "    body_index_options=[STANDARD_ANALYZER],\n",
+    "    session=None,\n",
+    "    keyspace=None,\n",
+    ")\n",
+    "\n",
+    "vectorstore.add_texts(\n",
+    "    [\n",
+    "        \"In 2023, I visited Paris\",\n",
+    "        \"In 2022, I visited New York\",\n",
+    "        \"In 2021, I visited New Orleans\",\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73887f23bbab978c",
+   "metadata": {
+    "collapsed": false,
+    "id": "73887f23bbab978c"
+   },
+   "source": [
+    "If we do a standard similarity search, we get all the documents:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c2a39fa",
+   "metadata": {
+    "id": "3c2a39fa",
+    "outputId": "5290085b-896c-4c81-9b40-c315331b7009"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='In 2022, I visited New York'),\n",
+       "Document(page_content='In 2023, I visited Paris'),\n",
+       "Document(page_content='In 2021, I visited New Orleans')]"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vectorstore.as_retriever().invoke(\"What city did I visit last?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78d4c3c79e67d8c3",
+   "metadata": {
+    "collapsed": false,
+    "id": "78d4c3c79e67d8c3"
+   },
+   "source": [
+    "The Astra DB vectorstore `body_search` argument can be used to filter the search on the term `new`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "56393baa",
+   "metadata": {
+    "id": "56393baa",
+    "outputId": "d1c939f3-342f-4df4-94a3-d25429b5a25e"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='In 2022, I visited New York'),\n",
+       "Document(page_content='In 2021, I visited New Orleans')]"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vectorstore.as_retriever(search_kwargs={\"body_search\": \"new\"}).invoke(\n",
+    "    \"What city did I visit last?\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "88ae97ed",
+   "metadata": {
+    "id": "88ae97ed"
+   },
+   "source": [
+    "We can now create the chain that we will use to do question-answering over"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62707b4f",
+   "metadata": {
+    "id": "62707b4f"
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_core.runnables import (\n",
+    "    ConfigurableField,\n",
+    "    RunnablePassthrough,\n",
+    ")\n",
+    "from langchain_openai import ChatOpenAI"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6778ffa",
+   "metadata": {
+    "id": "b6778ffa"
+   },
+   "source": [
+    "This is basic question-answering chain set up."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44a865f6",
+   "metadata": {
+    "id": "44a865f6"
+   },
+   "outputs": [],
+   "source": [
+    "template = \"\"\"Answer the question based only on the following context:\n",
+    "{context}\n",
+    "Question: {question}\n",
+    "\"\"\"\n",
+    "prompt = ChatPromptTemplate.from_template(template)\n",
+    "\n",
+    "model = ChatOpenAI()\n",
+    "\n",
+    "retriever = vectorstore.as_retriever()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72125166",
+   "metadata": {
+    "id": "72125166"
+   },
+   "source": [
+    "Here we mark the retriever as having a configurable field. All vectorstore retrievers have `search_kwargs` as a field. This is just a dictionary, with vectorstore specific fields"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "babbadff",
+   "metadata": {
+    "id": "babbadff"
+   },
+   "outputs": [],
+   "source": [
+    "configurable_retriever = retriever.configurable_fields(\n",
+    "    search_kwargs=ConfigurableField(\n",
+    "        id=\"search_kwargs\",\n",
+    "        name=\"Search Kwargs\",\n",
+    "        description=\"The search kwargs to use\",\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2d481b70",
+   "metadata": {
+    "id": "2d481b70"
+   },
+   "source": [
+    "We can now create the chain using our configurable retriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "210b0446",
+   "metadata": {
+    "id": "210b0446"
+   },
+   "outputs": [],
+   "source": [
+    "chain = (\n",
+    "    {\"context\": configurable_retriever, \"question\": RunnablePassthrough()}\n",
+    "    | prompt\n",
+    "    | model\n",
+    "    | StrOutputParser()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a38037b2",
+   "metadata": {
+    "id": "a38037b2",
+    "outputId": "1ea14996-5965-4a5e-9678-b9c35ce5c6de"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Paris"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.invoke(\"What city did I visit last?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7f6458c3",
+   "metadata": {
+    "id": "7f6458c3"
+   },
+   "source": [
+    "We can now invoke the chain with configurable options. `search_kwargs` is the id of the configurable field. The value is the search kwargs to use for Astra DB."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9gYLqBTH8BFz",
+   "metadata": {
+    "id": "9gYLqBTH8BFz",
+    "outputId": "4358a2e6-f306-48f1-dd5c-781ac8a33e89"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "New York"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.invoke(\n",
+    "    \"What city did I visit last?\",\n",
+    "    config={\"configurable\": {\"search_kwargs\": {\"body_search\": \"new\"}}},\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/how_to/index.mdx
+++ b/docs/docs/how_to/index.mdx
@@ -3,168 +3,176 @@ sidebar_position: 0
 sidebar_class_name: hidden
 ---

-# How-to Guides
+# How-to guides

-Here you’ll find short answers to “How do I….?” types of questions. 
-These how-to guides don’t cover topics in depth – you’ll find that material in the [Tutorials](/docs/tutorials) and the [API Reference](https://api.python.langchain.com/en/latest/). 
-However, these guides will help you quickly accomplish common tasks.
+Here you’ll find answers to “How do I….?” types of questions.
+These guides are *goal-oriented* and *concrete*; they're meant to help you complete a specific task.
+For conceptual explanations see [Conceptual Guides](/docs/concepts/).
+For end-to-end walkthroughs see [Tutorials](/docs/tutorials).
+For comprehensive descriptions of every class and function see [API Reference](https://api.python.langchain.com/en/latest/).

-## Core Functionality
+## Key features

-This covers functionality that is core to using LangChain
+This highlights functionality that is core to using LangChain.

- [How to return structured data from an LLM](/docs/how_to/structured_output/)
- [How to use a chat model to call tools](/docs/how_to/tool_calling/)
- [How to stream](/docs/how_to/streaming)
- [How to debug your LLM apps](/docs/how_to/debugging/)
+- [How to: return structured data from an LLM](/docs/how_to/structured_output/)
+- [How to: use a chat model to call tools](/docs/how_to/tool_calling/)
+- [How to: stream runnables](/docs/how_to/streaming)
+- [How to: debug your LLM apps](/docs/how_to/debugging/)

 ## LangChain Expression Language (LCEL)

-LangChain Expression Language a way to create arbitrary custom chains.
+LangChain Expression Language is a way to create arbitrary custom chains. It is built on the Runnable protocol.

- [How to combine multiple runnables into a chain](/docs/how_to/sequence)
- [How to invoke runnables in parallel](/docs/how_to/parallel/)
- [How to attach runtime arguments to a runnable](/docs/how_to/binding/)
- [How to run custom functions](/docs/how_to/functions)
- [How to pass through arguments from one step to the next](/docs/how_to/passthrough)
- [How to add values to a chain's state](/docs/how_to/assign)
- [How to configure a chain at runtime](/docs/how_to/configure)
- [How to add message history](/docs/how_to/message_history)
- [How to route execution within a chain](/docs/how_to/routing)
- [How to inspect your runnables](/docs/how_to/inspect)
- [How to add fallbacks](/docs/how_to/fallbacks)
+- [How to: chain runnables](/docs/how_to/sequence)
+- [How to: stream runnables](/docs/how_to/streaming)
+- [How to: invoke runnables in parallel](/docs/how_to/parallel/)
+- [How to: attach runtime arguments to a runnable](/docs/how_to/binding/)
+- [How to: run custom functions](/docs/how_to/functions)
+- [How to: pass through arguments from one step to the next](/docs/how_to/passthrough)
+- [How to: add values to a chain's state](/docs/how_to/assign)
+- [How to: configure a chain at runtime](/docs/how_to/configure)
+- [How to: add message history](/docs/how_to/message_history)
+- [How to: route execution within a chain](/docs/how_to/routing)
+- [How to: inspect runnables](/docs/how_to/inspect)
+- [How to: add fallbacks](/docs/how_to/fallbacks)

 ## Components

 These are the core building blocks you can use when building applications.

-### Prompt Templates
+### Prompt templates

 Prompt Templates are responsible for formatting user input into a format that can be passed to a language model.

- [How to use few shot examples](/docs/how_to/few_shot_examples)
- [How to use few shot examples in chat models](/docs/how_to/few_shot_examples_chat/)
- [How to partially format prompt templates](/docs/how_to/prompts_partial)
- [How to compose prompts together](/docs/how_to/prompts_composition)
+- [How to: use few shot examples](/docs/how_to/few_shot_examples)
+- [How to: use few shot examples in chat models](/docs/how_to/few_shot_examples_chat/)
+- [How to: partially format prompt templates](/docs/how_to/prompts_partial)
+- [How to: compose prompts together](/docs/how_to/prompts_composition)

-### Example Selectors
+### Example selectors

 Example Selectors are responsible for selecting the correct few shot examples to pass to the prompt.

- [How to use example selectors](/docs/how_to/example_selectors)
- [How to select examples by length](/docs/how_to/example_selectors_length_based)
- [How to select examples by semantic similarity](/docs/how_to/example_selectors_similarity)
- [How to select examples by semantic ngram overlap](/docs/how_to/example_selectors_ngram)
- [How to select examples by maximal marginal relevance](/docs/how_to/example_selectors_mmr)
+- [How to: use example selectors](/docs/how_to/example_selectors)
+- [How to: select examples by length](/docs/how_to/example_selectors_length_based)
+- [How to: select examples by semantic similarity](/docs/how_to/example_selectors_similarity)
+- [How to: select examples by semantic ngram overlap](/docs/how_to/example_selectors_ngram)
+- [How to: select examples by maximal marginal relevance](/docs/how_to/example_selectors_mmr)

-### Chat Models
+### Chat models

 Chat Models are newer forms of language models that take messages in and output a message.

- [How to do function/tool calling](/docs/how_to/tool_calling)
- [How to get models to return structured output](/docs/how_to/structured_output)
- [How to cache model responses](/docs/how_to/chat_model_caching)
- [How to get log probabilities from model calls](/docs/how_to/logprobs)
- [How to create a custom chat model class](/docs/how_to/custom_chat_model)
- [How to stream a response back](/docs/how_to/chat_streaming)
- [How to track token usage](/docs/how_to/chat_token_usage_tracking)
+- [How to: do function/tool calling](/docs/how_to/tool_calling)
+- [How to: get models to return structured output](/docs/how_to/structured_output)
+- [How to: cache model responses](/docs/how_to/chat_model_caching)
+- [How to: get log probabilities](/docs/how_to/logprobs)
+- [How to: create a custom chat model class](/docs/how_to/custom_chat_model)
+- [How to: stream a response back](/docs/how_to/chat_streaming)
+- [How to: track token usage](/docs/how_to/chat_token_usage_tracking)
+- [How to: track response metadata across providers](/docs/how_to/response_metadata)

 ### LLMs

 What LangChain calls LLMs are older forms of language models that take a string in and output a string.

- [How to cache model responses](/docs/how_to/llm_caching)
- [How to create a custom LLM class](/docs/how_to/custom_llm)
- [How to stream a response back](/docs/how_to/streaming_llm)
- [How to track token usage](/docs/how_to/llm_token_usage_tracking)
+- [How to: cache model responses](/docs/how_to/llm_caching)
+- [How to: create a custom LLM class](/docs/how_to/custom_llm)
+- [How to: stream a response back](/docs/how_to/streaming_llm)
+- [How to: track token usage](/docs/how_to/llm_token_usage_tracking)
+- [How to: work with local LLMs](/docs/how_to/local_llms)

-### Output Parsers
+### Output parsers

 Output Parsers are responsible for taking the output of an LLM and parsing into more structured format.

- [How to use output parsers to parse an LLM response into structured format](/docs/how_to/output_parser_structured)
- [How to parse JSON output](/docs/how_to/output_parser_json)
- [How to parse XML output](/docs/how_to/output_parser_xml)
- [How to parse YAML output](/docs/how_to/output_parser_yaml)
- [How to retry when output parsing errors occur](/docs/how_to/output_parser_retry)
- [How to try to fix errors in output parsing](/docs/how_to/output_parser_fixing)
- [How to write a custom output parser class](/docs/how_to/output_parser_custom)
+- [How to: use output parsers to parse an LLM response into structured format](/docs/how_to/output_parser_structured)
+- [How to: parse JSON output](/docs/how_to/output_parser_json)
+- [How to: parse XML output](/docs/how_to/output_parser_xml)
+- [How to: parse YAML output](/docs/how_to/output_parser_yaml)
+- [How to: retry when output parsing errors occur](/docs/how_to/output_parser_retry)
+- [How to: try to fix errors in output parsing](/docs/how_to/output_parser_fixing)
+- [How to: write a custom output parser class](/docs/how_to/output_parser_custom)

-### Document Loaders
+### Document loaders

 Document Loaders are responsible for loading documents from a variety of sources.

- [How to load CSV data](/docs/how_to/document_loader_csv)
- [How to load data from a directory](/docs/how_to/document_loader_directory)
- [How to load HTML data](/docs/how_to/document_loader_html)
- [How to load JSON data](/docs/how_to/document_loader_json)
- [How to load Markdown data](/docs/how_to/document_loader_markdown)
- [How to load Microsoft Office data](/docs/how_to/document_loader_office_file)
- [How to load PDF files](/docs/how_to/document_loader_pdf)
- [How to write a custom document loader](/docs/how_to/document_loader_custom)
+- [How to: load CSV data](/docs/how_to/document_loader_csv)
+- [How to: load data from a directory](/docs/how_to/document_loader_directory)
+- [How to: load HTML data](/docs/how_to/document_loader_html)
+- [How to: load JSON data](/docs/how_to/document_loader_json)
+- [How to: load Markdown data](/docs/how_to/document_loader_markdown)
+- [How to: load Microsoft Office data](/docs/how_to/document_loader_office_file)
+- [How to: load PDF files](/docs/how_to/document_loader_pdf)
+- [How to: write a custom document loader](/docs/how_to/document_loader_custom)

-### Text Splitters
+### Text splitters

 Text Splitters take a document and split into chunks that can be used for retrieval.

- [How to recursively split text](/docs/how_to/recursive_text_splitter)
- [How to split by HTML headers](/docs/how_to/HTML_header_metadata_splitter)
- [How to split by HTML sections](/docs/how_to/HTML_section_aware_splitter)
- [How to split by character](/docs/how_to/character_text_splitter)
- [How to split code](/docs/how_to/code_splitter)
- [How to split Markdown by headers](/docs/how_to/markdown_header_metadata_splitter)
- [How to recursively split JSON](/docs/how_to/recursive_json_splitter)
- [How to split text into semantic chunks](/docs/how_to/semantic-chunker)
- [How to split by tokens](/docs/how_to/split_by_token)
+- [How to: recursively split text](/docs/how_to/recursive_text_splitter)
+- [How to: split by HTML headers](/docs/how_to/HTML_header_metadata_splitter)
+- [How to: split by HTML sections](/docs/how_to/HTML_section_aware_splitter)
+- [How to: split by character](/docs/how_to/character_text_splitter)
+- [How to: split code](/docs/how_to/code_splitter)
+- [How to: split Markdown by headers](/docs/how_to/markdown_header_metadata_splitter)
+- [How to: recursively split JSON](/docs/how_to/recursive_json_splitter)
+- [How to: split text into semantic chunks](/docs/how_to/semantic-chunker)
+- [How to: split by tokens](/docs/how_to/split_by_token)

-### Embedding Models
+### Embedding models

 Embedding Models take a piece of text and create a numerical representation of it.

- [How to embed text data](/docs/how_to/embed_text)
- [How to cache embedding results](/docs/how_to/caching_embeddings)
+- [How to: embed text data](/docs/how_to/embed_text)
+- [How to: cache embedding results](/docs/how_to/caching_embeddings)

-### Vector Stores
+### Vector stores

-Vector Stores are databases that can efficiently store and retrieve embeddings.
+Vector stores are databases that can efficiently store and retrieve embeddings.

- [How to use a vector store to retrieve data](/docs/how_to/vectorstores)
+- [How to: use a vector store to retrieve data](/docs/how_to/vectorstores)

 ### Retrievers

 Retrievers are responsible for taking a query and returning relevant documents.

- [How use a vector store to retrieve data](/docs/how_to/vectorstore_retriever)
- [How to generate multiple queries to retrieve data for](/docs/how_to/MultiQueryRetriever)
- [How to use contextual compression to compress the data retrieved](/docs/how_to/contextual_compression)
- [How to write a custom retriever class](/docs/how_to/custom_retriever)
- [How to combine the results from multiple retrievers](/docs/how_to/ensemble_retriever)
- [How to reorder retrieved results to put most relevant documents not in the middle](/docs/how_to/long_context_reorder)
- [How to generate multiple embeddings per document](/docs/how_to/multi_vector)
- [How to retrieve the whole document for a chunk](/docs/how_to/parent_document_retriever)
- [How to generate metadata filters](/docs/how_to/self_query)
- [How to create a time-weighted retriever](/docs/how_to/time_weighted_vectorstore)
+- [How to: use a vector store to retrieve data](/docs/how_to/vectorstore_retriever)
+- [How to: generate multiple queries to retrieve data for](/docs/how_to/MultiQueryRetriever)
+- [How to: use contextual compression to compress the data retrieved](/docs/how_to/contextual_compression)
+- [How to: write a custom retriever class](/docs/how_to/custom_retriever)
+- [How to: add similarity scores to retriever results](/docs/how_to/add_scores_retriever)
+- [How to: combine the results from multiple retrievers](/docs/how_to/ensemble_retriever)
+- [How to: reorder retrieved results to put most relevant documents not in the middle](/docs/how_to/long_context_reorder)
+- [How to: generate multiple embeddings per document](/docs/how_to/multi_vector)
+- [How to: retrieve the whole document for a chunk](/docs/how_to/parent_document_retriever)
+- [How to: generate metadata filters](/docs/how_to/self_query)
+- [How to: create a time-weighted retriever](/docs/how_to/time_weighted_vectorstore)
+- [How to: use hybrid vector and keyword retrieval](/docs/how_to/hybrid)

 ### Indexing

 Indexing is the process of keeping your vectorstore in-sync with the underlying data source.

- [How to reindex data to keep your vectorstore in-sync with the underlying data source](/docs/how_to/indexing)
+- [How to: reindex data to keep your vectorstore in-sync with the underlying data source](/docs/how_to/indexing)

 ### Tools

 LangChain Tools contain a description of the tool (to pass to the language model) as well as the implementation of the function to call).

- [How to use LangChain tools](/docs/how_to/tools)
- [How to use a chat model to call tools](/docs/how_to/tool_calling/)
- [How to use LangChain toolkits](/docs/how_to/toolkits)
- [How to define a custom tool](/docs/how_to/custom_tools)
- [How to convert LangChain tools to OpenAI functions](/docs/how_to/tools_as_openai_functions)
- [How to use tools without function calling](/docs/how_to/tools_prompting)
- [How to let the LLM choose between multiple tools](/docs/how_to/tools_multiple)
- [How to add a human in the loop to tool usage](/docs/how_to/tools_human)
- [How to do parallel tool use](/docs/how_to/tools_parallel)
- [How to handle errors when calling tools](/docs/how_to/tools_error)
+- [How to: use LangChain tools](/docs/how_to/tools)
+- [How to: use a chat model to call tools](/docs/how_to/tool_calling/)
+- [How to: use LangChain toolkits](/docs/how_to/toolkits)
+- [How to: define a custom tool](/docs/how_to/custom_tools)
+- [How to: convert LangChain tools to OpenAI functions](/docs/how_to/tools_as_openai_functions)
+- [How to: use tools without function calling](/docs/how_to/tools_prompting)
+- [How to: let the LLM choose between multiple tools](/docs/how_to/tools_multiple)
+- [How to: add a human in the loop to tool usage](/docs/how_to/tools_human)
+- [How to: do parallel tool use](/docs/how_to/tools_parallel)
+- [How to: handle errors when calling tools](/docs/how_to/tools_error)
+- [How to: call tools using multi-modal data](/docs/how_to/tool_calls_multi_modal)

 ### Agents

@@ -174,25 +182,22 @@ For in depth how-to guides for agents, please check out [LangGraph](https://gith

 :::

- [How to use legacy LangChain Agents (AgentExecutor)](/docs/how_to/agent_executor)
- [How to migrate from legacy LangChain agents to LangGraph](/docs/how_to/migrate_agent)
+- [How to: use legacy LangChain Agents (AgentExecutor)](/docs/how_to/agent_executor)
+- [How to: migrate from legacy LangChain agents to LangGraph](/docs/how_to/migrate_agent)

 ### Custom

 All of LangChain components can easily be extended to support your own versions.

- [How to create a custom chat model class](/docs/how_to/custom_chat_model)
- [How to create a custom LLM class](/docs/how_to/custom_llm)
- [How to write a custom retriever class](/docs/how_to/custom_retriever)
- [How to write a custom document loader](/docs/how_to/document_loader_custom)
- [How to write a custom output parser class](/docs/how_to/output_parser_custom)
-
- [How to define a custom tool](/docs/how_to/custom_tools)
+- [How to: create a custom chat model class](/docs/how_to/custom_chat_model)
+- [How to: create a custom LLM class](/docs/how_to/custom_llm)
+- [How to: write a custom retriever class](/docs/how_to/custom_retriever)
+- [How to: write a custom document loader](/docs/how_to/document_loader_custom)
+- [How to: write a custom output parser class](/docs/how_to/output_parser_custom)
+- [How to: define a custom tool](/docs/how_to/custom_tools)


-
-
-## Use Cases
+## Use cases

 These guides cover use-case specific details.

@@ -200,54 +205,54 @@ These guides cover use-case specific details.

 Retrieval Augmented Generation (RAG) is a way to connect LLMs to external sources of data.

- [How to add chat history](/docs/how_to/qa_chat_history_how_to/)
- [How to stream](/docs/how_to/qa_streaming/)
- [How to return sources](/docs/how_to/qa_sources/)
- [How to return citations](/docs/how_to/qa_citations/)
- [How to do per-user retrieval](/docs/how_to/qa_per_user/)
+- [How to: add chat history](/docs/how_to/qa_chat_history_how_to/)
+- [How to: stream](/docs/how_to/qa_streaming/)
+- [How to: return sources](/docs/how_to/qa_sources/)
+- [How to: return citations](/docs/how_to/qa_citations/)
+- [How to: do per-user retrieval](/docs/how_to/qa_per_user/)


 ### Extraction

 Extraction is when you use LLMs to extract structured information from unstructured text.

- [How to use reference examples](/docs/how_to/extraction_examples/)
- [How to handle long text](/docs/how_to/extraction_long_text/)
- [How to do extraction without using function calling](/docs/how_to/extraction_parse)
+- [How to: use reference examples](/docs/how_to/extraction_examples/)
+- [How to: handle long text](/docs/how_to/extraction_long_text/)
+- [How to: do extraction without using function calling](/docs/how_to/extraction_parse)

 ### Chatbots

 Chatbots involve using an LLM to have a conversation.

- [How to manage memory](/docs/how_to/chatbots_memory)
- [How to do retrieval](/docs/how_to/chatbots_retrieval)
- [How to use tools](/docs/how_to/chatbots_tools)
+- [How to: manage memory](/docs/how_to/chatbots_memory)
+- [How to: do retrieval](/docs/how_to/chatbots_retrieval)
+- [How to: use tools](/docs/how_to/chatbots_tools)

-### Query Analysis
+### Query analysis

 Query Analysis is the task of using an LLM to generate a query to send to a retriever.

- [How to add examples to the prompt](/docs/how_to/query_few_shot)
- [How to handle cases where no queries are generated](/docs/how_to/query_no_queries)
- [How to handle multiple queries](/docs/how_to/query_multiple_queries)
- [How to handle multiple retrievers](/docs/how_to/query_multiple_retrievers)
- [How to construct filters](/docs/how_to/query_constructing_filters)
- [How to deal with high cardinality categorical variables](/docs/how_to/query_high_cardinality)
+- [How to: add examples to the prompt](/docs/how_to/query_few_shot)
+- [How to: handle cases where no queries are generated](/docs/how_to/query_no_queries)
+- [How to: handle multiple queries](/docs/how_to/query_multiple_queries)
+- [How to: handle multiple retrievers](/docs/how_to/query_multiple_retrievers)
+- [How to: construct filters](/docs/how_to/query_constructing_filters)
+- [How to: deal with high cardinality categorical variables](/docs/how_to/query_high_cardinality)

 ### Q&A over SQL + CSV

 You can use LLMs to do question answering over tabular data.

- [How to use prompting to improve results](/docs/how_to/sql_prompting)
- [How to do query validation](/docs/how_to/sql_query_checking)
- [How to deal with large databases](/docs/how_to/sql_large_db)
- [How to deal with CSV files](/docs/how_to/sql_csv)
+- [How to: use prompting to improve results](/docs/how_to/sql_prompting)
+- [How to: do query validation](/docs/how_to/sql_query_checking)
+- [How to: deal with large databases](/docs/how_to/sql_large_db)
+- [How to: deal with CSV files](/docs/how_to/sql_csv)

-### Q&A over Graph Databases
+### Q&A over graph databases

 You can use an LLM to do question answering over graph databases.

- [How to map values to a database](/docs/how_to/graph_mapping)
- [How to add a semantic layer over the database](/docs/how_to/graph_semantic)
- [How to improve results with prompting](/docs/how_to/graph_prompting)
- [How to construct knowledge graphs](/docs/how_to/graph_constructing)
+- [How to: map values to a database](/docs/how_to/graph_mapping)
+- [How to: add a semantic layer over the database](/docs/how_to/graph_semantic)
+- [How to: improve results with prompting](/docs/how_to/graph_prompting)
+- [How to: construct knowledge graphs](/docs/how_to/graph_constructing)
--- a/docs/docs/how_to/inspect.ipynb
+++ b/docs/docs/how_to/inspect.ipynb
@@ -5,21 +5,20 @@
   "id": "8c5eb99a",
   "metadata": {},
   "source": [
-    "# How to inspect your runnables\n",
+    "# How to inspect runnables\n",
+    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
+    "- [Chaining runnables](/docs/how_to/sequence/)\n",
+    "\n",
+    ":::\n",
    "\n",
    "Once you create a runnable with [LangChain Expression Language](/docs/concepts/#langchain-expression-language), you may often want to inspect it to get a better sense for what is going on. This notebook covers some methods for doing so.\n",
    "\n",
    "This guide shows some ways you can programmatically introspect the internal steps of chains. If you are instead interested in debugging issues in your chain, see [this section](/docs/how_to/debugging) instead.\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
-    "- [Chaining runnables](/docs/how_to/sequence/)\n",
-    "`} />\n",
-    "```\n",
-    "\n",
    "First, let's create an example chain. We will create one that does retrieval:"
   ]
  },
@@ -222,7 +221,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/local_llms.ipynb
+++ b/docs/docs/how_to/local_llms.ipynb
@@ -0,0 +1,676 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b8982428",
+   "metadata": {},
+   "source": [
+    "# Run LLMs locally\n",
+    "\n",
+    "## Use case\n",
+    "\n",
+    "The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), [Ollama](https://github.com/ollama/ollama), [GPT4All](https://github.com/nomic-ai/gpt4all), [llamafile](https://github.com/Mozilla-Ocho/llamafile), and others underscore the demand to run LLMs locally (on your own device).\n",
+    "\n",
+    "This has at least two important benefits:\n",
+    "\n",
+    "1. `Privacy`: Your data is not sent to a third party, and it is not subject to the terms of service of a commercial service\n",
+    "2. `Cost`: There is no inference fee, which is important for token-intensive applications (e.g., [long-running simulations](https://twitter.com/RLanceMartin/status/1691097659262820352?s=20), summarization)\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "Running an LLM locally requires a few things:\n",
+    "\n",
+    "1. `Open-source LLM`: An open-source LLM that can be freely modified and shared \n",
+    "2. `Inference`: Ability to run this LLM on your device w/ acceptable latency\n",
+    "\n",
+    "### Open-source LLMs\n",
+    "\n",
+    "Users can now gain access to a rapidly growing set of [open-source LLMs](https://cameronrwolfe.substack.com/p/the-history-of-open-source-llms-better). \n",
+    "\n",
+    "These LLMs can be assessed across at least two dimensions (see figure):\n",
+    " \n",
+    "1. `Base model`: What is the base-model and how was it trained?\n",
+    "2. `Fine-tuning approach`: Was the base-model fine-tuned and, if so, what [set of instructions](https://cameronrwolfe.substack.com/p/beyond-llama-the-power-of-open-llms#%C2%A7alpaca-an-instruction-following-llama-model) was used?\n",
+    "\n",
+    "![Image description](../../static/img/OSS_LLM_overview.png)\n",
+    "\n",
+    "The relative performance of these models can be assessed using several leaderboards, including:\n",
+    "\n",
+    "1. [LmSys](https://chat.lmsys.org/?arena)\n",
+    "2. [GPT4All](https://gpt4all.io/index.html)\n",
+    "3. [HuggingFace](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard)\n",
+    "\n",
+    "### Inference\n",
+    "\n",
+    "A few frameworks for this have emerged to support inference of open-source LLMs on various devices:\n",
+    "\n",
+    "1. [`llama.cpp`](https://github.com/ggerganov/llama.cpp): C++ implementation of llama inference code with [weight optimization / quantization](https://finbarr.ca/how-is-llama-cpp-possible/)\n",
+    "2. [`gpt4all`](https://docs.gpt4all.io/index.html): Optimized C backend for inference\n",
+    "3. [`Ollama`](https://ollama.ai/): Bundles model weights and environment into an app that runs on device and serves the LLM\n",
+    "4. [`llamafile`](https://github.com/Mozilla-Ocho/llamafile): Bundles model weights and everything needed to run the model in a single file, allowing you to run the LLM locally from this file without any additional installation steps\n",
+    "\n",
+    "In general, these frameworks will do a few things:\n",
+    "\n",
+    "1. `Quantization`: Reduce the memory footprint of the raw model weights\n",
+    "2. `Efficient implementation for inference`: Support inference on consumer hardware (e.g., CPU or laptop GPU)\n",
+    "\n",
+    "In particular, see [this excellent post](https://finbarr.ca/how-is-llama-cpp-possible/) on the importance of quantization.\n",
+    "\n",
+    "![Image description](../../static/img/llama-memory-weights.png)\n",
+    "\n",
+    "With less precision, we radically decrease the memory needed to store the LLM in memory.\n",
+    "\n",
+    "In addition, we can see the importance of GPU memory bandwidth [sheet](https://docs.google.com/spreadsheets/d/1OehfHHNSn66BP2h3Bxp2NJTVX97icU0GmCXF6pK23H8/edit#gid=0)!\n",
+    "\n",
+    "A Mac M2 Max is 5-6x faster than a M1 for inference due to the larger GPU memory bandwidth.\n",
+    "\n",
+    "![Image description](../../static/img/llama_t_put.png)\n",
+    "\n",
+    "## Quickstart\n",
+    "\n",
+    "[`Ollama`](https://ollama.ai/) is one way to easily run inference on macOS.\n",
+    " \n",
+    "The instructions [here](https://github.com/jmorganca/ollama?tab=readme-ov-file#ollama) provide details, which we summarize:\n",
+    " \n",
+    "* [Download and run](https://ollama.ai/download) the app\n",
+    "* From command line, fetch a model from this [list of options](https://github.com/jmorganca/ollama): e.g., `ollama pull llama2`\n",
+    "* When the app is running, all models are automatically served on `localhost:11434`\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "86178adb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "' The first man on the moon was Neil Armstrong, who landed on the moon on July 20, 1969 as part of the Apollo 11 mission. obviously.'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_community.llms import Ollama\n",
+    "\n",
+    "llm = Ollama(model=\"llama2\")\n",
+    "llm.invoke(\"The first man on the moon was ...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "343ab645",
+   "metadata": {},
+   "source": [
+    "Stream tokens as they are being generated."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "9cd83603",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " The first man to walk on the moon was Neil Armstrong, an American astronaut who was part of the Apollo 11 mission in 1969. февруари 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon's surface, famously declaring \"That's one small step for man, one giant leap for mankind\" as he took his first steps. He was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the moon during the mission."
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "' The first man to walk on the moon was Neil Armstrong, an American astronaut who was part of the Apollo 11 mission in 1969. февруари 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon\\'s surface, famously declaring \"That\\'s one small step for man, one giant leap for mankind\" as he took his first steps. He was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the moon during the mission.'"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.callbacks.manager import CallbackManager\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
+    "\n",
+    "llm = Ollama(\n",
+    "    model=\"llama2\", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])\n",
+    ")\n",
+    "llm.invoke(\"The first man on the moon was ...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5cb27414",
+   "metadata": {},
+   "source": [
+    "## Environment\n",
+    "\n",
+    "Inference speed is a challenge when running models locally (see above).\n",
+    "\n",
+    "To minimize latency, it is desirable to run models locally on GPU, which ships with many consumer laptops [e.g., Apple devices](https://www.apple.com/newsroom/2022/06/apple-unveils-m2-with-breakthrough-performance-and-capabilities/).\n",
+    "\n",
+    "And even with GPU, the available GPU memory bandwidth (as noted above) is important.\n",
+    "\n",
+    "### Running Apple silicon GPU\n",
+    "\n",
+    "`Ollama` and [`llamafile`](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#gpu-support) will automatically utilize the GPU on Apple devices.\n",
+    " \n",
+    "Other frameworks require the user to set up the environment to utilize the Apple GPU.\n",
+    "\n",
+    "For example, `llama.cpp` python bindings can be configured to use the GPU via [Metal](https://developer.apple.com/metal/).\n",
+    "\n",
+    "Metal is a graphics and compute API created by Apple providing near-direct access to the GPU. \n",
+    "\n",
+    "See the [`llama.cpp`](docs/integrations/llms/llamacpp) setup [here](https://github.com/abetlen/llama-cpp-python/blob/main/docs/install/macos.md) to enable this.\n",
+    "\n",
+    "In particular, ensure that conda is using the correct virtual environment that you created (`miniforge3`).\n",
+    "\n",
+    "E.g., for me:\n",
+    "\n",
+    "```\n",
+    "conda activate /Users/rlm/miniforge3/envs/llama\n",
+    "```\n",
+    "\n",
+    "With the above confirmed, then:\n",
+    "\n",
+    "```\n",
+    "CMAKE_ARGS=\"-DLLAMA_METAL=on\" FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c382e79a",
+   "metadata": {},
+   "source": [
+    "## LLMs\n",
+    "\n",
+    "There are various ways to gain access to quantized model weights.\n",
+    "\n",
+    "1. [`HuggingFace`](https://huggingface.co/TheBloke) - Many quantized model are available for download and can be run with framework such as [`llama.cpp`](https://github.com/ggerganov/llama.cpp). You can also download models in [`llamafile` format](https://huggingface.co/models?other=llamafile) from HuggingFace.\n",
+    "2. [`gpt4all`](https://gpt4all.io/index.html) - The model explorer offers a leaderboard of metrics and associated quantized models available for download \n",
+    "3. [`Ollama`](https://github.com/jmorganca/ollama) - Several models can be accessed directly via `pull`\n",
+    "\n",
+    "### Ollama\n",
+    "\n",
+    "With [Ollama](https://github.com/jmorganca/ollama), fetch a model via `ollama pull <model family>:<tag>`:\n",
+    "\n",
+    "* E.g., for Llama-7b: `ollama pull llama2` will download the most basic version of the model (e.g., smallest # parameters and 4 bit quantization)\n",
+    "* We can also specify a particular version from the [model list](https://github.com/jmorganca/ollama?tab=readme-ov-file#model-library), e.g., `ollama pull llama2:13b`\n",
+    "* See the full set of parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.ollama.Ollama.html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "8ecd2f78",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "' Sure! Here\\'s the answer, broken down step by step:\\n\\nThe first man on the moon was... Neil Armstrong.\\n\\nHere\\'s how I arrived at that answer:\\n\\n1. The first manned mission to land on the moon was Apollo 11.\\n2. The mission included three astronauts: Neil Armstrong, Edwin \"Buzz\" Aldrin, and Michael Collins.\\n3. Neil Armstrong was the mission commander and the first person to set foot on the moon.\\n4. On July 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon\\'s surface, famously declaring \"That\\'s one small step for man, one giant leap for mankind.\"\\n\\nSo, the first man on the moon was Neil Armstrong!'"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_community.llms import Ollama\n",
+    "\n",
+    "llm = Ollama(model=\"llama2:13b\")\n",
+    "llm.invoke(\"The first man on the moon was ... think step by step\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07c8c0d1",
+   "metadata": {},
+   "source": [
+    "### Llama.cpp\n",
+    "\n",
+    "Llama.cpp is compatible with a [broad set of models](https://github.com/ggerganov/llama.cpp).\n",
+    "\n",
+    "For example, below we run inference on `llama2-13b` with 4 bit quantization downloaded from [HuggingFace](https://huggingface.co/TheBloke/Llama-2-13B-GGML/tree/main).\n",
+    "\n",
+    "As noted above, see the [API reference](https://api.python.langchain.com/en/latest/llms/langchain.llms.llamacpp.LlamaCpp.html?highlight=llamacpp#langchain.llms.llamacpp.LlamaCpp) for the full set of parameters. \n",
+    "\n",
+    "From the [llama.cpp API reference docs](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.llamacpp.LlamaCpp.htm), a few are worth commenting on:\n",
+    "\n",
+    "`n_gpu_layers`: number of layers to be loaded into GPU memory\n",
+    "\n",
+    "* Value: 1\n",
+    "* Meaning: Only one layer of the model will be loaded into GPU memory (1 is often sufficient).\n",
+    "\n",
+    "`n_batch`: number of tokens the model should process in parallel \n",
+    "\n",
+    "* Value: n_batch\n",
+    "* Meaning: It's recommended to choose a value between 1 and n_ctx (which in this case is set to 2048)\n",
+    "\n",
+    "`n_ctx`: Token context window\n",
+    "\n",
+    "* Value: 2048\n",
+    "* Meaning: The model will consider a window of 2048 tokens at a time\n",
+    "\n",
+    "`f16_kv`: whether the model should use half-precision for the key/value cache\n",
+    "\n",
+    "* Value: True\n",
+    "* Meaning: The model will use half-precision, which can be more memory efficient; Metal only supports True."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5eba38dc",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%env CMAKE_ARGS=\"-DLLAMA_METAL=on\"\n",
+    "%env FORCE_CMAKE=1\n",
+    "%pip install --upgrade --quiet  llama-cpp-python --no-cache-dirclear"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a88bf0c8-e989-4bcd-bcb7-4d7757e684f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.callbacks.manager import CallbackManager\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
+    "from langchain_community.llms import LlamaCpp\n",
+    "\n",
+    "llm = LlamaCpp(\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
+    "    n_gpu_layers=1,\n",
+    "    n_batch=512,\n",
+    "    n_ctx=2048,\n",
+    "    f16_kv=True,\n",
+    "    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f56f5168",
+   "metadata": {},
+   "source": [
+    "The console log will show the below to indicate Metal was enabled properly from steps above:\n",
+    "```\n",
+    "ggml_metal_init: allocating\n",
+    "ggml_metal_init: using MPS\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "7890a077",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Llama.generate: prefix-match hit\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " and use logical reasoning to figure out who the first man on the moon was.\n",
+      "\n",
+      "Here are some clues:\n",
+      "\n",
+      "1. The first man on the moon was an American.\n",
+      "2. He was part of the Apollo 11 mission.\n",
+      "3. He stepped out of the lunar module and became the first person to set foot on the moon's surface.\n",
+      "4. His last name is Armstrong.\n",
+      "\n",
+      "Now, let's use our reasoning skills to figure out who the first man on the moon was. Based on clue #1, we know that the first man on the moon was an American. Clue #2 tells us that he was part of the Apollo 11 mission. Clue #3 reveals that he was the first person to set foot on the moon's surface. And finally, clue #4 gives us his last name: Armstrong.\n",
+      "Therefore, the first man on the moon was Neil Armstrong!"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =  9623.21 ms\n",
+      "llama_print_timings:      sample time =   143.77 ms /   203 runs   (    0.71 ms per token,  1412.01 tokens per second)\n",
+      "llama_print_timings: prompt eval time =   485.94 ms /     7 tokens (   69.42 ms per token,    14.40 tokens per second)\n",
+      "llama_print_timings:        eval time =  6385.16 ms /   202 runs   (   31.61 ms per token,    31.64 tokens per second)\n",
+      "llama_print_timings:       total time =  7279.28 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "\" and use logical reasoning to figure out who the first man on the moon was.\\n\\nHere are some clues:\\n\\n1. The first man on the moon was an American.\\n2. He was part of the Apollo 11 mission.\\n3. He stepped out of the lunar module and became the first person to set foot on the moon's surface.\\n4. His last name is Armstrong.\\n\\nNow, let's use our reasoning skills to figure out who the first man on the moon was. Based on clue #1, we know that the first man on the moon was an American. Clue #2 tells us that he was part of the Apollo 11 mission. Clue #3 reveals that he was the first person to set foot on the moon's surface. And finally, clue #4 gives us his last name: Armstrong.\\nTherefore, the first man on the moon was Neil Armstrong!\""
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm.invoke(\"The first man on the moon was ... Let's think step by step\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "831ddf7c",
+   "metadata": {},
+   "source": [
+    "### GPT4All\n",
+    "\n",
+    "We can use model weights downloaded from [GPT4All](/docs/integrations/llms/gpt4all) model explorer.\n",
+    "\n",
+    "Similar to what is shown above, we can run inference and use [the API reference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.gpt4all.GPT4All.html) to set parameters of interest."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e27baf6e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install gpt4all"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "915ecd4c-8f6b-4de3-a787-b64cb7c682b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.llms import GPT4All\n",
+    "\n",
+    "llm = GPT4All(\n",
+    "    model=\"/Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "e3d4526f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\".\\n1) The United States decides to send a manned mission to the moon.2) They choose their best astronauts and train them for this specific mission.3) They build a spacecraft that can take humans to the moon, called the Lunar Module (LM).4) They also create a larger spacecraft, called the Saturn V rocket, which will launch both the LM and the Command Service Module (CSM), which will carry the astronauts into orbit.5) The mission is planned down to the smallest detail: from the trajectory of the rockets to the exact movements of the astronauts during their moon landing.6) On July 16, 1969, the Saturn V rocket launches from Kennedy Space Center in Florida, carrying the Apollo 11 mission crew into space.7) After one and a half orbits around the Earth, the LM separates from the CSM and begins its descent to the moon's surface.8) On July 20, 1969, at 2:56 pm EDT (GMT-4), Neil Armstrong becomes the first man on the moon. He speaks these\""
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm.invoke(\"The first man on the moon was ... Let's think step by step\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "056854e2-5e4b-4a03-be7e-03192e5c4e1e",
+   "metadata": {},
+   "source": [
+    "### llamafile\n",
+    "\n",
+    "One of the simplest ways to run an LLM locally is using a [llamafile](https://github.com/Mozilla-Ocho/llamafile). All you need to do is:\n",
+    "\n",
+    "1) Download a llamafile from [HuggingFace](https://huggingface.co/models?other=llamafile)\n",
+    "2) Make the file executable\n",
+    "3) Run the file\n",
+    "\n",
+    "llamafiles bundle model weights and a [specially-compiled](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#technical-details) version of [`llama.cpp`](https://github.com/ggerganov/llama.cpp) into a single file that can run on most computers any additional dependencies. They also come with an embedded inference server that provides an [API](https://github.com/Mozilla-Ocho/llamafile/blob/main/llama.cpp/server/README.md#api-endpoints) for interacting with your model. \n",
+    "\n",
+    "Here's a simple bash script that shows all 3 setup steps:\n",
+    "\n",
+    "```bash\n",
+    "# Download a llamafile from HuggingFace\n",
+    "wget https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
+    "\n",
+    "# Make the file executable. On Windows, instead just rename the file to end in \".exe\".\n",
+    "chmod +x TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
+    "\n",
+    "# Start the model server. Listens at http://localhost:8080 by default.\n",
+    "./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser\n",
+    "```\n",
+    "\n",
+    "After you run the above setup steps, you can use LangChain to interact with your model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "002e655c-ba18-4db3-ac7b-f33e825d14b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"\\nFirstly, let's imagine the scene where Neil Armstrong stepped onto the moon. This happened in 1969. The first man on the moon was Neil Armstrong. We already know that.\\n2nd, let's take a step back. Neil Armstrong didn't have any special powers. He had to land his spacecraft safely on the moon without injuring anyone or causing any damage. If he failed to do this, he would have been killed along with all those people who were on board the spacecraft.\\n3rd, let's imagine that Neil Armstrong successfully landed his spacecraft on the moon and made it back to Earth safely. The next step was for him to be hailed as a hero by his people back home. It took years before Neil Armstrong became an American hero.\\n4th, let's take another step back. Let's imagine that Neil Armstrong wasn't hailed as a hero, and instead, he was just forgotten. This happened in the 1970s. Neil Armstrong wasn't recognized for his remarkable achievement on the moon until after he died.\\n5th, let's take another step back. Let's imagine that Neil Armstrong didn't die in the 1970s and instead, lived to be a hundred years old. This happened in 2036. In the year 2036, Neil Armstrong would have been a centenarian.\\nNow, let's think about the present. Neil Armstrong is still alive. He turned 95 years old on July 20th, 2018. If he were to die now, his achievement of becoming the first human being to set foot on the moon would remain an unforgettable moment in history.\\nI hope this helps you understand the significance and importance of Neil Armstrong's achievement on the moon!\""
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_community.llms.llamafile import Llamafile\n",
+    "\n",
+    "llm = Llamafile()\n",
+    "\n",
+    "llm.invoke(\"The first man on the moon was ... Let's think step by step.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6b84e543",
+   "metadata": {},
+   "source": [
+    "## Prompts\n",
+    "\n",
+    "Some LLMs will benefit from specific prompts.\n",
+    "\n",
+    "For example, LLaMA will use [special tokens](https://twitter.com/RLanceMartin/status/1681879318493003776?s=20).\n",
+    "\n",
+    "We can use `ConditionalPromptSelector` to set prompt based on the model type."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16759b7c-7903-4269-b7b4-f83b313d8091",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set our LLM\n",
+    "llm = LlamaCpp(\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
+    "    n_gpu_layers=1,\n",
+    "    n_batch=512,\n",
+    "    n_ctx=2048,\n",
+    "    f16_kv=True,\n",
+    "    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "66656084",
+   "metadata": {},
+   "source": [
+    "Set the associated prompt based upon the model version."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "8555f5bf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='<<SYS>> \\n You are an assistant tasked with improving Google search results. \\n <</SYS>> \\n\\n [INST] Generate THREE Google search queries that are similar to this question. The output should be a numbered list of questions and each should have a question mark at the end: \\n\\n {question} [/INST]', template_format='f-string', validate_template=True)"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.chains import LLMChain\n",
+    "from langchain.chains.prompt_selector import ConditionalPromptSelector\n",
+    "from langchain_core.prompts import PromptTemplate\n",
+    "\n",
+    "DEFAULT_LLAMA_SEARCH_PROMPT = PromptTemplate(\n",
+    "    input_variables=[\"question\"],\n",
+    "    template=\"\"\"<<SYS>> \\n You are an assistant tasked with improving Google search \\\n",
+    "results. \\n <</SYS>> \\n\\n [INST] Generate THREE Google search queries that \\\n",
+    "are similar to this question. The output should be a numbered list of questions \\\n",
+    "and each should have a question mark at the end: \\n\\n {question} [/INST]\"\"\",\n",
+    ")\n",
+    "\n",
+    "DEFAULT_SEARCH_PROMPT = PromptTemplate(\n",
+    "    input_variables=[\"question\"],\n",
+    "    template=\"\"\"You are an assistant tasked with improving Google search \\\n",
+    "results. Generate THREE Google search queries that are similar to \\\n",
+    "this question. The output should be a numbered list of questions and each \\\n",
+    "should have a question mark at the end: {question}\"\"\",\n",
+    ")\n",
+    "\n",
+    "QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(\n",
+    "    default_prompt=DEFAULT_SEARCH_PROMPT,\n",
+    "    conditionals=[(lambda llm: isinstance(llm, LlamaCpp), DEFAULT_LLAMA_SEARCH_PROMPT)],\n",
+    ")\n",
+    "\n",
+    "prompt = QUESTION_PROMPT_SELECTOR.get_prompt(llm)\n",
+    "prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "d0aedfd2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Sure! Here are three similar search queries with a question mark at the end:\n",
+      "\n",
+      "1. Which NBA team did LeBron James lead to a championship in the year he was drafted?\n",
+      "2. Who won the Grammy Awards for Best New Artist and Best Female Pop Vocal Performance in the same year that Lady Gaga was born?\n",
+      "3. What MLB team did Babe Ruth play for when he hit 60 home runs in a single season?"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time = 14943.19 ms\n",
+      "llama_print_timings:      sample time =    72.93 ms /   101 runs   (    0.72 ms per token,  1384.87 tokens per second)\n",
+      "llama_print_timings: prompt eval time = 14942.95 ms /    93 tokens (  160.68 ms per token,     6.22 tokens per second)\n",
+      "llama_print_timings:        eval time =  3430.85 ms /   100 runs   (   34.31 ms per token,    29.15 tokens per second)\n",
+      "llama_print_timings:       total time = 18578.26 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'  Sure! Here are three similar search queries with a question mark at the end:\\n\\n1. Which NBA team did LeBron James lead to a championship in the year he was drafted?\\n2. Who won the Grammy Awards for Best New Artist and Best Female Pop Vocal Performance in the same year that Lady Gaga was born?\\n3. What MLB team did Babe Ruth play for when he hit 60 home runs in a single season?'"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Chain\n",
+    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
+    "question = \"What NFL team won the Super Bowl in the year that Justin Bieber was born?\"\n",
+    "llm_chain.run({\"question\": question})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6e0d37e7-f1d9-4848-bf2c-c22392ee141f",
+   "metadata": {},
+   "source": [
+    "We also can use the LangChain Prompt Hub to fetch and / or store prompts that are model specific.\n",
+    "\n",
+    "This will work with your [LangSmith API key](https://docs.smith.langchain.com/).\n",
+    "\n",
+    "For example, [here](https://smith.langchain.com/hub/rlm/rag-prompt-llama) is a prompt for RAG with LLaMA-specific tokens."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ba66260",
+   "metadata": {},
+   "source": [
+    "## Use cases\n",
+    "\n",
+    "Given an `llm` created from one of the models above, you can use it for [many use cases](/docs/how_to#use-cases).\n",
+    "\n",
+    "For example, here is a guide to [RAG](/docs/tutorials/local_rag) with local LLMs.\n",
+    "\n",
+    "In general, use cases for local LLMs can be driven by at least two factors:\n",
+    "\n",
+    "* `Privacy`: private data (e.g., journals, etc) that a user does not want to share \n",
+    "* `Cost`: text preprocessing (extraction/tagging), summarization, and agent simulations are token-use-intensive tasks\n",
+    "\n",
+    "In addition, [here](https://blog.langchain.dev/using-langsmith-to-support-fine-tuning-of-open-source-llms/) is an overview on fine-tuning, which can utilize open-source LLMs."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/how_to/logprobs.ipynb
+++ b/docs/docs/how_to/logprobs.ipynb
@@ -5,17 +5,16 @@
   "id": "78b45321-7740-4399-b2ad-459811131de3",
   "metadata": {},
   "source": [
-    "# How to get log probabilities from model calls\n",
+    "# How to get log probabilities\n",
    "\n",
-    "Certain chat models can be configured to return token-level log probabilities representing the likelihood of a given token. This guide walks through how to get this information in LangChain.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "`} />\n",
-    "```"
+    "\n",
+    ":::\n",
+    "\n",
+    "Certain chat models can be configured to return token-level log probabilities representing the likelihood of a given token. This guide walks through how to get this information in LangChain."
   ]
  },
  {
@@ -170,7 +169,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/long_context_reorder.ipynb
+++ b/docs/docs/how_to/long_context_reorder.ipynb
@@ -21,7 +21,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet  sentence-transformers langchain-chroma langchain langchain-openai > /dev/null"
+    "%pip install --upgrade --quiet  sentence-transformers langchain-chroma langchain langchain-openai langchain-huggingface > /dev/null"
   ]
  },
  {
@@ -57,7 +57,7 @@
    "from langchain_community.document_transformers import (\n",
    "    LongContextReorder,\n",
    ")\n",
-    "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
+    "from langchain_huggingface import HuggingFaceEmbeddings\n",
    "from langchain_openai import OpenAI\n",
    "\n",
    "# Get embeddings.\n",
--- a/docs/docs/how_to/message_history.ipynb
+++ b/docs/docs/how_to/message_history.ipynb
@@ -7,6 +7,17 @@
   "source": [
    "# How to add message history\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
+    "- [Chaining runnables](/docs/how_to/sequence/)\n",
+    "- [Configuring chain parameters at runtime](/docs/how_to/configure)\n",
+    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
+    "- [Chat Messages](/docs/concepts/#message-types)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "Passing conversation state into and out a chain is vital when building a chatbot. The [`RunnableWithMessageHistory`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory) class lets us add message history to certain types of chains. It wraps another Runnable and manages the chat message history for it.\n",
    "\n",
    "Specifically, it can be used for any Runnable that takes as input one of:\n",
@@ -21,18 +32,6 @@
    "* a sequence of `BaseMessage`\n",
    "* a dict with a key that contains a sequence of `BaseMessage`\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
-    "- [Chaining runnables](/docs/how_to/sequence/)\n",
-    "- [Configuring chain parameters at runtime](/docs/how_to/configure)\n",
-    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
-    "- [Chat Messages](/docs/concepts/#message-types)\n",
-    "`} />\n",
-    "```\n",
-    "\n",
    "Let's take a look at some examples to see how it works. First we construct a runnable (which here accepts a dict as input and returns a message as output):\n",
    "\n",
    "```{=mdx}\n",
@@ -667,7 +666,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/migrate_agent.ipynb
+++ b/docs/docs/how_to/migrate_agent.ipynb
@@ -8,8 +8,23 @@
    "# How to migrate from legacy LangChain agents to LangGraph\n",
    "\n",
    "Here we focus on how to move from legacy LangChain agents to LangGraph agents.\n",
-    "LangChain agents (the AgentExecutor in particular) have multiple configuration parameters.\n",
-    "In this notebook we will show how those parameters map to the LangGraph `chat_agent_executor`."
+    "LangChain agents (the [AgentExecutor](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor) in particular) have multiple configuration parameters.\n",
+    "In this notebook we will show how those parameters map to the LangGraph [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent).\n",
+    "\n",
+    "#### Prerequisites\n",
+    "\n",
+    "This how-to guide uses OpenAI as the LLM. Install the dependencies to run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "662fac50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture --no-stderr\n",
+    "%pip install -U langchain-openai langchain langgraph"
   ]
  },
  {
@@ -24,7 +39,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 6,
   "id": "1e425fea-2796-4b99-bee6-9a6ffe73f756",
   "metadata": {},
   "outputs": [],
@@ -32,7 +47,7 @@
    "from langchain_core.tools import tool\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "model = ChatOpenAI()\n",
+    "model = ChatOpenAI(model=\"gpt-4o\")\n",
    "\n",
    "\n",
    "@tool\n",
@@ -52,12 +67,12 @@
   "id": "af002033-fe51-4d14-b47c-3e9b483c8395",
   "metadata": {},
   "source": [
-    "For AgentExecutor, we define a prompt with a placeholder for the agent's scratchpad. The agent can be invoked as follows:"
+    "For the LangChain [AgentExecutor](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), we define a prompt with a placeholder for the agent's scratchpad. The agent can be invoked as follows:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 15,
   "id": "03ea357c-9c36-4464-b2cc-27bd150e1554",
   "metadata": {},
   "outputs": [
@@ -68,20 +83,21 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 21,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain.agents import AgentExecutor, create_tool_calling_agent\n",
-    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
    "\n",
    "prompt = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\"system\", \"You are a helpful assistant\"),\n",
    "        (\"human\", \"{input}\"),\n",
-    "        MessagesPlaceholder(\"agent_scratchpad\"),\n",
+    "        # Placeholders fill up a **list** of messages\n",
+    "        (\"placeholder\", \"{agent_scratchpad}\"),\n",
    "    ]\n",
    ")\n",
    "\n",
@@ -97,13 +113,13 @@
   "id": "94205f3b-fd2b-4fd7-af69-0a3fc313dc88",
   "metadata": {},
   "source": [
-    "LangGraph's `chat_agent_executor` manages a state that is defined by a list of messages. It will continue to process the list until there are no tool calls in the agent's output. To kick it off, we input a list of messages. The output will contain the entire state of the graph-- in this case, the conversation history.\n",
+    "LangGraph's [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) manages a state that is defined by a list of messages. It will continue to process the list until there are no tool calls in the agent's output. To kick it off, we input a list of messages. The output will contain the entire state of the graph-- in this case, the conversation history.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 16,
   "id": "53a3737a-d167-4255-89bf-20ac37f89a3e",
   "metadata": {},
   "outputs": [
@@ -111,18 +127,18 @@
     "data": {
      "text/plain": [
       "{'input': 'what is the value of magic_function(3)?',\n",
-       " 'output': 'The value of the magic function with input 3 is 5.'}"
+       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 22,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "from langgraph.prebuilt import chat_agent_executor\n",
+    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
-    "app = chat_agent_executor.create_tool_calling_executor(model, tools)\n",
+    "app = create_react_agent(model, tools)\n",
    "\n",
    "\n",
    "messages = app.invoke({\"messages\": [(\"human\", query)]})\n",
@@ -134,7 +150,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 17,
   "id": "74ecebe3-512e-409c-a661-bdd5b0a2b782",
   "metadata": {},
   "outputs": [
@@ -142,10 +158,10 @@
     "data": {
      "text/plain": [
       "{'input': 'Pardon?',\n",
-       " 'output': 'The value of the magic function with input 3 is 5.'}"
+       " 'output': 'The result of applying the `magic_function` to the input `3` is `5`.'}"
      ]
     },
-     "execution_count": 23,
+     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -171,7 +187,7 @@
    "\n",
    "With legacy LangChain agents you have to pass in a prompt template. You can use this to control the agent.\n",
    "\n",
-    "With LangGraph `chat_agent_executor`, by default there is no prompt. You can achieve similar control over the agent in a few ways:\n",
+    "With LangGraph [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent), by default there is no prompt. You can achieve similar control over the agent in a few ways:\n",
    "\n",
    "1. Pass in a system message as input\n",
    "2. Initialize the agent with a system message\n",
@@ -184,7 +200,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 18,
   "id": "a9a11ccd-75e2-4c11-844d-a34870b0ff91",
   "metadata": {},
   "outputs": [
@@ -195,7 +211,7 @@
       " 'output': 'El valor de `magic_function(3)` es 5.'}"
      ]
     },
-     "execution_count": 24,
+     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -205,7 +221,8 @@
    "    [\n",
    "        (\"system\", \"You are a helpful assistant. Respond only in Spanish.\"),\n",
    "        (\"human\", \"{input}\"),\n",
-    "        MessagesPlaceholder(\"agent_scratchpad\"),\n",
+    "        # Placeholders fill up a **list** of messages\n",
+    "        (\"placeholder\", \"{agent_scratchpad}\"),\n",
    "    ]\n",
    ")\n",
    "\n",
@@ -221,44 +238,27 @@
   "id": "bd5f5500-5ae4-4000-a9fd-8c5a2cc6404d",
   "metadata": {},
   "source": [
-    "Now, let's pass a custom system message to `chat_agent_executor`. This can either be a string or a LangChain SystemMessage."
+    "Now, let's pass a custom system message to [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent). This can either be a string or a LangChain SystemMessage."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 14,
   "id": "a9486805-676a-4d19-a5c4-08b41b172989",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'input': 'what is the value of magic_function(3)?',\n",
-       " 'output': 'El valor de magic_function(3) es 5.'}"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "from langchain_core.messages import SystemMessage\n",
+    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
-    "system_message = \"Respond only in Spanish\"\n",
+    "system_message = \"You are a helpful assistant. Respond only in Spanish.\"\n",
    "# This could also be a SystemMessage object\n",
-    "# system_message = SystemMessage(content=\"Respond only in Spanish\")\n",
+    "# system_message = SystemMessage(content=\"You are a helpful assistant. Respond only in Spanish.\")\n",
    "\n",
-    "app = chat_agent_executor.create_tool_calling_executor(\n",
-    "    model, tools, messages_modifier=system_message\n",
-    ")\n",
+    "app = create_react_agent(model, tools, messages_modifier=system_message)\n",
    "\n",
    "\n",
-    "messages = app.invoke({\"messages\": [(\"human\", query)]})\n",
-    "{\n",
-    "    \"input\": query,\n",
-    "    \"output\": messages[\"messages\"][-1].content,\n",
-    "}"
+    "messages = app.invoke({\"messages\": [(\"user\", query)]})"
   ]
  },
  {
@@ -272,7 +272,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 21,
   "id": "d369ab45-0c82-45f4-9d3e-8efb8dd47e2c",
   "metadata": {},
   "outputs": [
@@ -280,24 +280,35 @@
     "data": {
      "text/plain": [
       "{'input': 'what is the value of magic_function(3)?',\n",
-       " 'output': 'El valor de magic_function(3) es 5.'}"
+       " 'output': 'El valor de magic_function(3) es 5. ¡Pandamonium!'}"
      ]
     },
-     "execution_count": 27,
+     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "def _modify_messages(messages):\n",
-    "    return [SystemMessage(content=\"Respond only in spanish\")] + messages\n",
+    "from langchain_core.messages import AnyMessage\n",
+    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
-    "\n",
-    "app = chat_agent_executor.create_tool_calling_executor(\n",
-    "    model, tools, messages_modifier=_modify_messages\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\"system\", \"You are a helpful assistant. Respond only in Spanish.\"),\n",
+    "        (\"placeholder\", \"{messages}\"),\n",
+    "    ]\n",
    ")\n",
    "\n",
    "\n",
+    "def _modify_messages(messages: list[AnyMessage]):\n",
+    "    return prompt.invoke({\"messages\": messages}).to_messages() + [\n",
+    "        (\"user\", \"Also say 'Pandamonium!' after the answer.\")\n",
+    "    ]\n",
+    "\n",
+    "\n",
+    "app = create_react_agent(model, tools, messages_modifier=_modify_messages)\n",
+    "\n",
+    "\n",
    "messages = app.invoke({\"messages\": [(\"human\", query)]})\n",
    "{\n",
    "    \"input\": query,\n",
@@ -317,7 +328,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 22,
   "id": "4eff44bc-a620-4c8a-97b1-268692a842bb",
   "metadata": {},
   "outputs": [
@@ -325,7 +336,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_qckwqZI7p2LGYhMnQI5r6qsL', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'}, id='run-0602a2dd-c4d9-4050-b851-3e2b838c6773', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_qckwqZI7p2LGYhMnQI5r6qsL'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_qckwqZI7p2LGYhMnQI5r6qsL', 'index': 0}])], tool_call_id='call_qckwqZI7p2LGYhMnQI5r6qsL'), 5)]\n"
+      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_lIjE9voYOCFAVoUXSDPQ5bFI', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'}, id='run-7a23003a-ab50-4d7c-b14b-86129d1cacfe', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_lIjE9voYOCFAVoUXSDPQ5bFI'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_lIjE9voYOCFAVoUXSDPQ5bFI', 'index': 0}])], tool_call_id='call_lIjE9voYOCFAVoUXSDPQ5bFI'), 5)]\n"
     ]
    }
   ],
@@ -340,34 +351,33 @@
   "id": "594f7567-302f-4fa8-85bb-025ac8322162",
   "metadata": {},
   "source": [
-    "By default the `chat_agent_executor` in LangGraph appends all messages to the central state. Therefore, it is easy to see any intermediate steps by just looking at the full state."
+    "By default the [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) in LangGraph appends all messages to the central state. Therefore, it is easy to see any intermediate steps by just looking at the full state."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 23,
   "id": "4f4364ea-dffe-4d25-bdce-ef7d0020b880",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='408451ee-d65b-498b-abf1-788aaadfbeff'),\n",
-       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_eF7WussX7KgpGdoJFj6cWTxR', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 65, 'total_tokens': 79}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a07e5d11-9319-4e27-85fb-253b75c5d7c3-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_eF7WussX7KgpGdoJFj6cWTxR'}]),\n",
-       "  ToolMessage(content='5', name='magic_function', id='35045a27-a301-474b-b321-5f93da671fb1', tool_call_id='call_eF7WussX7KgpGdoJFj6cWTxR'),\n",
-       "  AIMessage(content='The value of magic_function(3) is 5.', response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 88, 'total_tokens': 101}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'stop', 'logprobs': None}, id='run-18a36a26-2477-4fc6-be51-7a675a6e10e8-0')]}"
+       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='8c252eb2-9496-4ad0-b3ae-9ecb2f6c406e'),\n",
+       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_xmBLOw2pRqB1aRTTiwqEEftW', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 64, 'total_tokens': 78}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_729ea513f7', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-2393b69c-7c52-4771-8bec-aca0e097fcc1-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_xmBLOw2pRqB1aRTTiwqEEftW'}]),\n",
+       "  ToolMessage(content='5', name='magic_function', id='bec0d0f9-bbaf-49fb-b0cb-46a658658f87', tool_call_id='call_xmBLOw2pRqB1aRTTiwqEEftW'),\n",
+       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 87, 'total_tokens': 101}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_729ea513f7', 'finish_reason': 'stop', 'logprobs': None}, id='run-5904d36f-b2a4-4f55-b431-12c82992c92c-0')]}"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "from langgraph.prebuilt import chat_agent_executor\n",
-    "\n",
-    "app = chat_agent_executor.create_tool_calling_executor(model, tools)\n",
+    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
+    "app = create_react_agent(model, tools=tools)\n",
    "\n",
    "messages = app.invoke({\"messages\": [(\"human\", query)]})\n",
    "\n",
@@ -390,7 +400,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 24,
   "id": "16f189a7-fc78-4cb5-aa16-a94ca06401a6",
   "metadata": {},
   "outputs": [],
@@ -406,7 +416,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 26,
   "id": "c96aefd7-6f6e-4670-aca6-1ac3d4e7871f",
   "metadata": {},
   "outputs": [
@@ -421,15 +431,7 @@
      "Invoking: `magic_function` with `{'input': '3'}`\n",
      "\n",
      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `magic_function` with `{'input': '3'}`\n",
-      "responded: I encountered an error while trying to determine the value of the magic function for the input \"3\". Let me try again.\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `magic_function` with `{'input': '3'}`\n",
-      "responded: I apologize for the inconvenience. It seems there is still an error in calculating the value of the magic function for the input \"3\". Let me attempt to resolve the issue by trying a different approach.\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3m\u001b[0m\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mParece que hubo un error al intentar obtener el valor de `magic_function(3)`. ¿Te gustaría que lo intente de nuevo?\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -438,15 +440,24 @@
     "data": {
      "text/plain": [
       "{'input': 'what is the value of magic_function(3)?',\n",
-       " 'output': 'Agent stopped due to max iterations.'}"
+       " 'output': 'Parece que hubo un error al intentar obtener el valor de `magic_function(3)`. ¿Te gustaría que lo intente de nuevo?'}"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\"system\", \"You are a helpful assistant. Respond only in Spanish.\"),\n",
+    "        (\"human\", \"{input}\"),\n",
+    "        # Placeholders fill up a **list** of messages\n",
+    "        (\"placeholder\", \"{agent_scratchpad}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
    "agent = create_tool_calling_agent(model, tools, prompt)\n",
    "agent_executor = AgentExecutor(\n",
    "    agent=agent,\n",
@@ -460,7 +471,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 29,
   "id": "b974a91f-6ae8-4644-83d9-73666258a6db",
   "metadata": {},
   "outputs": [
@@ -468,35 +479,33 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_VkrswGIkIUKJQyVF0AvMaU3p', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 65, 'total_tokens': 79}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-2dd5504b-9386-4b35-aed1-a2a267f883fd-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_VkrswGIkIUKJQyVF0AvMaU3p'}])]}}\n",
-      "------\n",
-      "{'action': {'messages': [ToolMessage(content='Sorry, there was an error. Please try again.', name='magic_function', id='85d7e845-f4ef-40a6-828d-c48c93b02b97', tool_call_id='call_VkrswGIkIUKJQyVF0AvMaU3p')]}}\n",
-      "------\n",
-      "{'agent': {'messages': [AIMessage(content='It seems there was an error when trying to calculate the value of the magic function for the input 3. Let me try again.', additional_kwargs={'tool_calls': [{'id': 'call_i5ZWsDhQvzgKs2bCroMB4JSL', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 42, 'prompt_tokens': 98, 'total_tokens': 140}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-6224c33b-0d3a-4925-9050-cb2a844dfe62-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_i5ZWsDhQvzgKs2bCroMB4JSL'}])]}}\n",
-      "------\n",
-      "{'action': {'messages': [ToolMessage(content='Sorry, there was an error. Please try again.', name='magic_function', id='f846363c-b143-402c-949d-40d84b19d979', tool_call_id='call_i5ZWsDhQvzgKs2bCroMB4JSL')]}}\n",
-      "------\n",
-      "{'agent': {'messages': [AIMessage(content='Unfortunately, there seems to be an issue with calculating the value of the magic function for the input 3. Let me attempt to resolve this issue by using a different approach.', additional_kwargs={'tool_calls': [{'id': 'call_I26nZWbe4iVnagUh4GVePwig', 'function': {'arguments': '{\"input\": \"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 65, 'prompt_tokens': 162, 'total_tokens': 227}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-0512509d-201e-4fbb-ac96-fdd68400810a-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_I26nZWbe4iVnagUh4GVePwig'}])]}}\n",
-      "------\n",
-      "{'action': {'messages': [ToolMessage(content='Sorry, there was an error. Please try again.', name='magic_function', id='fb19299f-de26-4659-9507-4bf4fb53bff4', tool_call_id='call_I26nZWbe4iVnagUh4GVePwig')]}}\n",
-      "------\n",
+      "('human', 'what is the value of magic_function(3)?')\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_9fMkSAUGRa2BsADwF32ct1m1', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 64, 'total_tokens': 78}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_729ea513f7', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-79084bff-6e10-49bb-b7f0-f613ebcc68ac-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_9fMkSAUGRa2BsADwF32ct1m1'}]\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='06f997fd-5309-4d56-afa3-2fe8cbf0d04f' tool_call_id='call_9fMkSAUGRa2BsADwF32ct1m1'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_Fg92zoL8oS5q6im2jR1INRvH', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 97, 'total_tokens': 111}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_729ea513f7', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-fc2e201f-6330-4330-8c4e-1a66e85c1ffa-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_Fg92zoL8oS5q6im2jR1INRvH'}]\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='a931dd6e-2ed7-42ea-a58c-5ffb4041d7c9' tool_call_id='call_Fg92zoL8oS5q6im2jR1INRvH'\n",
+      "content='It seems there is an issue with processing the request for the value of `magic_function(3)`. Let me try a different approach.' additional_kwargs={'tool_calls': [{'id': 'call_lbYBMptprZ6HMqNiTvoqhmwP', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 43, 'prompt_tokens': 130, 'total_tokens': 173}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_729ea513f7', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-2e0baab0-c4c1-42e8-b49d-a2704ae977c0-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_lbYBMptprZ6HMqNiTvoqhmwP'}]\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='9957435a-5de3-4662-b23c-abfa31e71208' tool_call_id='call_lbYBMptprZ6HMqNiTvoqhmwP'\n",
+      "content='It appears that the `magic_function` is currently experiencing issues when attempting to process the input \"3\". Unfortunately, I can\\'t provide the value of `magic_function(3)` at this moment.\\n\\nIf you have any other questions or need assistance with something else, please let me know!' response_metadata={'token_usage': {'completion_tokens': 58, 'prompt_tokens': 195, 'total_tokens': 253}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_729ea513f7', 'finish_reason': 'stop', 'logprobs': None} id='run-bb68d7ca-da76-43ad-80ab-23737a70c391-0'\n",
      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
   ],
   "source": [
-    "from langgraph.pregel import GraphRecursionError\n",
+    "from langgraph.errors import GraphRecursionError\n",
+    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
    "RECURSION_LIMIT = 2 * 3 + 1\n",
    "\n",
-    "app = chat_agent_executor.create_tool_calling_executor(model, tools)\n",
+    "app = create_react_agent(model, tools=tools)\n",
    "\n",
    "try:\n",
    "    for chunk in app.stream(\n",
-    "        {\"messages\": [(\"human\", query)]}, {\"recursion_limit\": RECURSION_LIMIT}\n",
+    "        {\"messages\": [(\"human\", query)]},\n",
+    "        {\"recursion_limit\": RECURSION_LIMIT},\n",
+    "        stream_mode=\"values\",\n",
    "    ):\n",
-    "        print(chunk)\n",
-    "        print(\"------\")\n",
+    "        print(chunk[\"messages\"][-1])\n",
    "except GraphRecursionError:\n",
    "    print({\"input\": query, \"output\": \"Agent stopped due to max iterations.\"})"
   ]
@@ -513,7 +522,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 30,
   "id": "4b8498fc-a7af-4164-a401-d8714f082306",
   "metadata": {},
   "outputs": [
@@ -540,7 +549,7 @@
       " 'output': 'Agent stopped due to max iterations.'}"
      ]
     },
-     "execution_count": 17,
+     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -569,9 +578,19 @@
    "agent_executor.invoke({\"input\": query})"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "d02eb025",
+   "metadata": {},
+   "source": [
+    "With LangGraph's react agent, you can control timeouts on two levels. \n",
+    "\n",
+    "You can set a `step_timeout` to bound each **step**:"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 31,
   "id": "a2b29113-e6be-4f91-aa4c-5c63dea3e423",
   "metadata": {},
   "outputs": [
@@ -579,14 +598,16 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_lp2tuTmBpulORJr4FJp9za4E', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 65, 'total_tokens': 79}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-4070a5d8-c2ea-46f3-a3a2-dfcd2ebdadc2-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_lp2tuTmBpulORJr4FJp9za4E'}])]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_GlXWTlJ0jQc2B8jQuDVFzmnc', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 64, 'total_tokens': 78}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_729ea513f7', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-38a0459b-a363-4181-b7a3-f25cb5c5d728-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_GlXWTlJ0jQc2B8jQuDVFzmnc'}])]}}\n",
      "------\n",
      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
   ],
   "source": [
-    "app = chat_agent_executor.create_tool_calling_executor(model, tools)\n",
+    "from langgraph.prebuilt import create_react_agent\n",
+    "\n",
+    "app = create_react_agent(model, tools=tools)\n",
    "# Set the max timeout for each step here\n",
    "app.step_timeout = 2\n",
    "\n",
@@ -598,13 +619,52 @@
    "    print({\"input\": query, \"output\": \"Agent stopped due to max iterations.\"})"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "32a9db70",
+   "metadata": {},
+   "source": [
+    "The other way to set a max timeout is just via python's stdlib [asyncio](https://docs.python.org/3/library/asyncio.html)."
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
   "id": "e9eb55f4-a321-4bac-b52d-9e43b411cf92",
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_cR1oJuYcNrOmcaaIRRvh5dSr', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 64, 'total_tokens': 78}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_729ea513f7', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-1c03c5d6-4883-4ccd-aa78-53dbafa99622-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_cR1oJuYcNrOmcaaIRRvh5dSr'}])]}}\n",
+      "------\n",
+      "{'action': {'messages': [ToolMessage(content='Sorry, there was an error. Please try again.', name='magic_function', id='596baf13-de35-4a4f-8b78-475b387a1f40', tool_call_id='call_cR1oJuYcNrOmcaaIRRvh5dSr')]}}\n",
+      "------\n",
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Task Cancelled.'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import asyncio\n",
+    "\n",
+    "from langgraph.prebuilt import create_react_agent\n",
+    "\n",
+    "app = create_react_agent(model, tools=tools)\n",
+    "\n",
+    "\n",
+    "async def stream(app, inputs):\n",
+    "    async for chunk in app.astream({\"messages\": [(\"human\", query)]}):\n",
+    "        print(chunk)\n",
+    "        print(\"------\")\n",
+    "\n",
+    "\n",
+    "try:\n",
+    "    task = asyncio.create_task(stream(app, {\"messages\": [(\"human\", query)]}))\n",
+    "    await asyncio.wait_for(task, timeout=3)\n",
+    "except TimeoutError:\n",
+    "    print(\"Task Cancelled.\")"
+   ]
  }
 ],
 "metadata": {
@@ -623,7 +683,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.1"
+   "version": "3.11.2"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/multi_vector.ipynb
+++ b/docs/docs/how_to/multi_vector.ipynb
@@ -52,7 +52,7 @@
   "source": [
    "loaders = [\n",
    "    TextLoader(\"../../paul_graham_essay.txt\"),\n",
-    "    TextLoader(\"../../state_of_the_union.txt\"),\n",
+    "    TextLoader(\"state_of_the_union.txt\"),\n",
    "]\n",
    "docs = []\n",
    "for loader in loaders:\n",
@@ -143,7 +143,7 @@
    {
     "data": {
      "text/plain": [
-       "Document(page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.', metadata={'doc_id': '2fd77862-9ed5-4fad-bf76-e487b747b333', 'source': '../../state_of_the_union.txt'})"
+       "Document(page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.', metadata={'doc_id': '2fd77862-9ed5-4fad-bf76-e487b747b333', 'source': 'state_of_the_union.txt'})"
      ]
     },
     "execution_count": 8,
--- a/docs/docs/how_to/output_parser_json.ipynb
+++ b/docs/docs/how_to/output_parser_json.ipynb
@@ -7,23 +7,22 @@
   "source": [
    "# How to parse JSON output\n",
    "\n",
-    "While some model providers support [built-in ways to return structured output](/docs/how_to/structured_output), not all do. We can use an output parser to help users to specify an arbitrary JSON schema via the prompt, query a model for outputs that conform to that schema, and finally parse that schema as JSON.\n",
+    ":::info Prerequisites\n",
    "\n",
-    ":::{.callout-note}\n",
-    "Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed JSON.\n",
-    ":::\n",
-    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [Chat models](/docs/concepts/#chat-models)\n",
    "- [Output parsers](/docs/concepts/#output-parsers)\n",
    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
    "- [Structured output](/docs/how_to/structured_output)\n",
    "- [Chaining runnables together](/docs/how_to/sequence/)\n",
-    "`}/>\n",
-    "```"
+    "\n",
+    ":::\n",
+    "\n",
+    "While some model providers support [built-in ways to return structured output](/docs/how_to/structured_output), not all do. We can use an output parser to help users to specify an arbitrary JSON schema via the prompt, query a model for outputs that conform to that schema, and finally parse that schema as JSON.\n",
+    "\n",
+    ":::{.callout-note}\n",
+    "Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed JSON.\n",
+    ":::"
   ]
  },
  {
@@ -255,7 +254,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/output_parser_structured.ipynb
+++ b/docs/docs/how_to/output_parser_structured.ipynb
@@ -94,7 +94,7 @@
   "source": [
    "## LCEL\n",
    "\n",
-    "Output parsers implement the [Runnable interface](/docs/expression_language/interface), the basic building block of the [LangChain Expression Language (LCEL)](/docs/expression_language/). This means they support `invoke`, `ainvoke`, `stream`, `astream`, `batch`, `abatch`, `astream_log` calls.\n",
+    "Output parsers implement the [Runnable interface](/docs/concepts#interface), the basic building block of the [LangChain Expression Language (LCEL)](/docs/concepts#langchain-expression-language). This means they support `invoke`, `ainvoke`, `stream`, `astream`, `batch`, `abatch`, `astream_log` calls.\n",
    "\n",
    "Output parsers accept a string or `BaseMessage` as input and can return an arbitrary type."
   ]
--- a/docs/docs/how_to/output_parser_xml.ipynb
+++ b/docs/docs/how_to/output_parser_xml.ipynb
@@ -7,6 +7,17 @@
   "source": [
    "# How to parse XML output\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Chat models](/docs/concepts/#chat-models)\n",
+    "- [Output parsers](/docs/concepts/#output-parsers)\n",
+    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
+    "- [Structured output](/docs/how_to/structured_output)\n",
+    "- [Chaining runnables together](/docs/how_to/sequence/)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "LLMs from different providers often have different strengths depending on the specific data they are trianed on. This also means that some may be \"better\" and more reliable at generating output in formats other than JSON.\n",
    "\n",
    "This guide shows you how to use the [`XMLOutputParser`](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html) to prompt models for XML output, then and parse that output into a usable format.\n",
@@ -15,17 +26,6 @@
    "Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed XML.\n",
    ":::\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "- [Output parsers](/docs/concepts/#output-parsers)\n",
-    "- [Structured output](/docs/how_to/structured_output)\n",
-    "- [Chaining runnables together](/docs/how_to/sequence/)\n",
-    "`}/>\n",
-    "```\n",
-    "\n",
    "In the following examples, we use Anthropic's Claude-2 model (https://docs.anthropic.com/claude/docs), which is one such model that is optimized for XML tags."
   ]
  },
@@ -274,7 +274,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/output_parser_yaml.ipynb
+++ b/docs/docs/how_to/output_parser_yaml.ipynb
@@ -7,24 +7,24 @@
   "source": [
    "# How to parse YAML output\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Chat models](/docs/concepts/#chat-models)\n",
+    "- [Output parsers](/docs/concepts/#output-parsers)\n",
+    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
+    "- [Structured output](/docs/how_to/structured_output)\n",
+    "- [Chaining runnables together](/docs/how_to/sequence/)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "LLMs from different providers often have different strengths depending on the specific data they are trianed on. This also means that some may be \"better\" and more reliable at generating output in formats other than JSON.\n",
    "\n",
    "This output parser allows users to specify an arbitrary schema and query LLMs for outputs that conform to that schema, using YAML to format their response.\n",
    "\n",
    ":::{.callout-note}\n",
    "Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed YAML.\n",
-    ":::\n",
-    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "- [Output parsers](/docs/concepts/#output-parsers)\n",
-    "- [Structured output](/docs/how_to/structured_output)\n",
-    "- [Chaining runnables together](/docs/how_to/sequence/)\n",
-    "`}/>\n",
-    "```"
+    ":::\n"
   ]
  },
  {
@@ -165,7 +165,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/parallel.ipynb
+++ b/docs/docs/how_to/parallel.ipynb
@@ -18,16 +18,15 @@
   "source": [
    "# How to invoke runnables in parallel\n",
    "\n",
-    "The [`RunnableParallel`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableParallel.html) primitive is essentially a dict whose values are runnables (or things that can be coerced to runnables, like functions). It runs all of its values in parallel, and each value is called with the overall input of the `RunnableParallel`. The final return value is a dict with the results of each value under its appropriate key.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
    "- [Chaining runnables](/docs/how_to/sequence)\n",
-    "`} />\n",
-    "```\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "The [`RunnableParallel`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableParallel.html) primitive is essentially a dict whose values are runnables (or things that can be coerced to runnables, like functions). It runs all of its values in parallel, and each value is called with the overall input of the `RunnableParallel`. The final return value is a dict with the results of each value under its appropriate key.\n",
    "\n",
    "## Formatting with `RunnableParallels`\n",
    "\n",
@@ -354,7 +353,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/parent_document_retriever.ipynb
+++ b/docs/docs/how_to/parent_document_retriever.ipynb
@@ -58,7 +58,7 @@
   "source": [
    "loaders = [\n",
    "    TextLoader(\"../../paul_graham_essay.txt\"),\n",
-    "    TextLoader(\"../../state_of_the_union.txt\"),\n",
+    "    TextLoader(\"state_of_the_union.txt\"),\n",
    "]\n",
    "docs = []\n",
    "for loader in loaders:\n",
--- a/docs/docs/how_to/passthrough.ipynb
+++ b/docs/docs/how_to/passthrough.ipynb
@@ -18,18 +18,18 @@
   "source": [
    "# How to pass through arguments from one step to the next\n",
    "\n",
-    "When composing chains with several steps, sometimes you will want to pass data from previous steps unchanged for use as input to a later step. The [`RunnablePassthrough`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html) class allows you to do just this, and is typically is used in conjuction with a [RunnableParallel](/docs/how_to/parallel/) to pass data through to a later step in your constructed chains.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
    "- [Chaining runnables](/docs/how_to/sequence/)\n",
    "- [Calling runnables in parallel](/docs/how_to/parallel/)\n",
    "- [Custom functions](/docs/how_to/functions/)\n",
-    "`} />\n",
-    "```\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "\n",
+    "When composing chains with several steps, sometimes you will want to pass data from previous steps unchanged for use as input to a later step. The [`RunnablePassthrough`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html) class allows you to do just this, and is typically is used in conjuction with a [RunnableParallel](/docs/how_to/parallel/) to pass data through to a later step in your constructed chains.\n",
    "\n",
    "See the example below:"
   ]
@@ -174,7 +174,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/prompts_composition.ipynb
+++ b/docs/docs/how_to/prompts_composition.ipynb
@@ -17,13 +17,14 @@
   "source": [
    "# How to compose prompts together\n",
    "\n",
-    "LangChain provides a user friendly interface for composing different parts of prompts together. You can do this with either string prompts or chat prompts. Constructing prompts this way allows for easy reuse of components.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
    "\n",
-    "<PrerequisiteLinks content={`- [Prompt templates](/docs/concepts/#prompt-templates)`} />\n",
-    "```"
+    ":::\n",
+    "\n",
+    "LangChain provides a user friendly interface for composing different parts of prompts together. You can do this with either string prompts or chat prompts. Constructing prompts this way allows for easy reuse of components."
   ]
  },
  {
@@ -306,7 +307,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/prompts_partial.ipynb
+++ b/docs/docs/how_to/prompts_partial.ipynb
@@ -17,6 +17,13 @@
   "source": [
    "# How to partially format prompt templates\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "Like partially binding arguments to a function, it can make sense to \"partial\" a prompt template - e.g. pass in a subset of the required values, as to create a new prompt template which expects only the remaining subset of values.\n",
    "\n",
    "LangChain supports this in two ways:\n",
@@ -26,14 +33,6 @@
    "\n",
    "In the examples below, we go over the motivations for both use cases as well as how to do it in LangChain.\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
-    "`}/>\n",
-    "```\n",
-    "\n",
    "## Partial with strings\n",
    "\n",
    "One common use case for wanting to partial a prompt template is if you get access to some of the variables in a prompt before others. For example, suppose you have a prompt template that requires two variables, `foo` and `baz`. If you get the `foo` value early on in your chain, but the `baz` value later, it can be inconvenient to pass both variables all the way through the chain. Instead, you can partial the prompt template with the `foo` value, and then pass the partialed prompt template along and just use that. Below is an example of doing this:\n"
@@ -191,7 +190,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/query_few_shot.ipynb
+++ b/docs/docs/how_to/query_few_shot.ipynb
@@ -19,7 +19,7 @@
    "\n",
    "As our query analysis becomes more complex, the LLM may struggle to understand how exactly it should respond in certain scenarios. In order to improve performance here, we can add examples to the prompt to guide the LLM.\n",
    "\n",
-    "Let's take a look at how we can add examples for the LangChain YouTube video query analyzer we built in the [Quickstart](/docs/use_cases/query_analysis/quickstart)."
+    "Let's take a look at how we can add examples for the LangChain YouTube video query analyzer we built in the [Quickstart](/docs/tutorials/query_analysis)."
   ]
  },
  {
--- a/docs/docs/how_to/recursive_text_splitter.ipynb
+++ b/docs/docs/how_to/recursive_text_splitter.ipynb
@@ -48,7 +48,7 @@
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "\n",
    "# Load example document\n",
-    "with open(\"../../../docs/modules/state_of_the_union.txt\") as f:\n",
+    "with open(\"state_of_the_union.txt\") as f:\n",
    "    state_of_the_union = f.read()\n",
    "\n",
    "text_splitter = RecursiveCharacterTextSplitter(\n",
--- a/docs/docs/how_to/response_metadata.ipynb
+++ b/docs/docs/how_to/response_metadata.ipynb
@@ -0,0 +1,354 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6bd1219b-f31c-41b0-95e6-3204ad894ac7",
+   "metadata": {},
+   "source": [
+    "# Response metadata\n",
+    "\n",
+    "Many model providers include some metadata in their chat generation responses. This metadata can be accessed via the `AIMessage.response_metadata: Dict` attribute. Depending on the model provider and model configuration, this can contain information like [token counts](/docs/how_to/chat_token_usage_tracking), [logprobs](/docs/how_to/logprobs), and more.\n",
+    "\n",
+    "Here's what the response metadata looks like for a few different providers:\n",
+    "\n",
+    "## OpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "161f5898-9976-4a75-943d-03eda1a40a60",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'token_usage': {'completion_tokens': 164,\n",
+       "  'prompt_tokens': 17,\n",
+       "  'total_tokens': 181},\n",
+       " 'model_name': 'gpt-4-turbo',\n",
+       " 'system_fingerprint': 'fp_76f018034d',\n",
+       " 'finish_reason': 'stop',\n",
+       " 'logprobs': None}"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(model=\"gpt-4-turbo\")\n",
+    "msg = llm.invoke([(\"human\", \"What's the oldest known example of cuneiform\")])\n",
+    "msg.response_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98eab683-df03-44a1-a034-ebbe7c6851b6",
+   "metadata": {},
+   "source": [
+    "## Anthropic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "61c43496-83b5-4d71-bd60-3e6d46c62a5e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'id': 'msg_01CzQyD7BX8nkhDNfT1QqvEp',\n",
+       " 'model': 'claude-3-sonnet-20240229',\n",
+       " 'stop_reason': 'end_turn',\n",
+       " 'stop_sequence': None,\n",
+       " 'usage': {'input_tokens': 17, 'output_tokens': 296}}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_anthropic import ChatAnthropic\n",
+    "\n",
+    "llm = ChatAnthropic(model=\"claude-3-sonnet-20240229\")\n",
+    "msg = llm.invoke([(\"human\", \"What's the oldest known example of cuneiform\")])\n",
+    "msg.response_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c1f24f69-18f6-43c1-8b26-3f88ec515259",
+   "metadata": {},
+   "source": [
+    "## Google VertexAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "39549336-25f5-4839-9846-f687cd77e59b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'is_blocked': False,\n",
+       " 'safety_ratings': [{'category': 'HARM_CATEGORY_HATE_SPEECH',\n",
+       "   'probability_label': 'NEGLIGIBLE',\n",
+       "   'blocked': False},\n",
+       "  {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT',\n",
+       "   'probability_label': 'NEGLIGIBLE',\n",
+       "   'blocked': False},\n",
+       "  {'category': 'HARM_CATEGORY_HARASSMENT',\n",
+       "   'probability_label': 'NEGLIGIBLE',\n",
+       "   'blocked': False},\n",
+       "  {'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT',\n",
+       "   'probability_label': 'NEGLIGIBLE',\n",
+       "   'blocked': False}],\n",
+       " 'citation_metadata': None,\n",
+       " 'usage_metadata': {'prompt_token_count': 10,\n",
+       "  'candidates_token_count': 30,\n",
+       "  'total_token_count': 40}}"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_google_vertexai import ChatVertexAI\n",
+    "\n",
+    "llm = ChatVertexAI(model=\"gemini-pro\")\n",
+    "msg = llm.invoke([(\"human\", \"What's the oldest known example of cuneiform\")])\n",
+    "msg.response_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc4ef8bb-eee3-4266-b530-0af9b3b79fe9",
+   "metadata": {},
+   "source": [
+    "## Bedrock (Anthropic)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "1e4ac668-4c6a-48ad-9a6f-7b291477b45d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'model_id': 'anthropic.claude-v2',\n",
+       " 'usage': {'prompt_tokens': 19, 'completion_tokens': 371, 'total_tokens': 390}}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_aws import ChatBedrock\n",
+    "\n",
+    "llm = ChatBedrock(model_id=\"anthropic.claude-v2\")\n",
+    "msg = llm.invoke([(\"human\", \"What's the oldest known example of cuneiform\")])\n",
+    "msg.response_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ee040d15-5575-4309-a9e9-aed5a09c78e3",
+   "metadata": {},
+   "source": [
+    "## MistralAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "deb41321-52d0-4795-a40c-4a811a13d7b0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'token_usage': {'prompt_tokens': 19,\n",
+       "  'total_tokens': 141,\n",
+       "  'completion_tokens': 122},\n",
+       " 'model': 'mistral-small',\n",
+       " 'finish_reason': 'stop'}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_mistralai import ChatMistralAI\n",
+    "\n",
+    "llm = ChatMistralAI()\n",
+    "msg = llm.invoke([(\"human\", \"What's the oldest known example of cuneiform\")])\n",
+    "msg.response_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "297c7be4-9505-48ac-96c0-4dc2047cfe7f",
+   "metadata": {},
+   "source": [
+    "## Groq"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "744e14ec-ff50-4642-9893-ff7bdf8927ff",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'token_usage': {'completion_time': 0.243,\n",
+       "  'completion_tokens': 132,\n",
+       "  'prompt_time': 0.022,\n",
+       "  'prompt_tokens': 22,\n",
+       "  'queue_time': None,\n",
+       "  'total_time': 0.265,\n",
+       "  'total_tokens': 154},\n",
+       " 'model_name': 'mixtral-8x7b-32768',\n",
+       " 'system_fingerprint': 'fp_7b44c65f25',\n",
+       " 'finish_reason': 'stop',\n",
+       " 'logprobs': None}"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_groq import ChatGroq\n",
+    "\n",
+    "llm = ChatGroq()\n",
+    "msg = llm.invoke([(\"human\", \"What's the oldest known example of cuneiform\")])\n",
+    "msg.response_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7cdeec00-8a8f-422a-8819-47c646578b65",
+   "metadata": {},
+   "source": [
+    "## TogetherAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a984118e-a731-4864-bcea-7dc6c6b3d139",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'token_usage': {'completion_tokens': 208,\n",
+       "  'prompt_tokens': 20,\n",
+       "  'total_tokens': 228},\n",
+       " 'model_name': 'mistralai/Mixtral-8x7B-Instruct-v0.1',\n",
+       " 'system_fingerprint': None,\n",
+       " 'finish_reason': 'eos',\n",
+       " 'logprobs': None}"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(\n",
+    "    base_url=\"https://api.together.xyz/v1\",\n",
+    "    api_key=os.environ[\"TOGETHER_API_KEY\"],\n",
+    "    model=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
+    ")\n",
+    "msg = llm.invoke([(\"human\", \"What's the oldest known example of cuneiform\")])\n",
+    "msg.response_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3d5e0614-8dc2-4948-a0b5-dc76c7837a5a",
+   "metadata": {},
+   "source": [
+    "## FireworksAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "6ae32a93-26db-41bb-95c2-38ddd5085fbe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'token_usage': {'prompt_tokens': 19,\n",
+       "  'total_tokens': 219,\n",
+       "  'completion_tokens': 200},\n",
+       " 'model_name': 'accounts/fireworks/models/mixtral-8x7b-instruct',\n",
+       " 'system_fingerprint': '',\n",
+       " 'finish_reason': 'length',\n",
+       " 'logprobs': None}"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_fireworks import ChatFireworks\n",
+    "\n",
+    "llm = ChatFireworks(model=\"accounts/fireworks/models/mixtral-8x7b-instruct\")\n",
+    "msg = llm.invoke([(\"human\", \"What's the oldest known example of cuneiform\")])\n",
+    "msg.response_metadata"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "poetry-venv-2",
+   "language": "python",
+   "name": "poetry-venv-2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/how_to/routing.ipynb
+++ b/docs/docs/how_to/routing.ipynb
@@ -18,6 +18,17 @@
   "source": [
    "# How to route execution within a chain\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
+    "- [Chaining runnables](/docs/how_to/sequence/)\n",
+    "- [Configuring chain parameters at runtime](/docs/how_to/configure)\n",
+    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
+    "- [Chat Messages](/docs/concepts/#message-types)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "Routing allows you to create non-deterministic chains where the output of a previous step defines the next step. Routing can help provide structure and consistency around interactions with models by allowing you to define states and use information related to those states as context to model calls.\n",
    "\n",
    "There are two ways to perform routing:\n",
@@ -25,19 +36,7 @@
    "1. Conditionally return runnables from a [`RunnableLambda`](/docs/how_to/functions) (recommended)\n",
    "2. Using a `RunnableBranch` (legacy)\n",
    "\n",
-    "We'll illustrate both methods using a two step sequence where the first step classifies an input question as being about `LangChain`, `Anthropic`, or `Other`, then routes to a corresponding prompt chain.\n",
-    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
-    "- [Chaining runnables](/docs/how_to/sequence/)\n",
-    "- [Configuring chain parameters at runtime](/docs/how_to/configure)\n",
-    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
-    "- [Chat Messages](/docs/concepts/#message-types)\n",
-    "`} />\n",
-    "```"
+    "We'll illustrate both methods using a two step sequence where the first step classifies an input question as being about `LangChain`, `Anthropic`, or `Other`, then routes to a corresponding prompt chain."
   ]
  },
  {
@@ -474,7 +473,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/semantic-chunker.ipynb
+++ b/docs/docs/how_to/semantic-chunker.ipynb
@@ -52,7 +52,7 @@
   "outputs": [],
   "source": [
    "# This is a long document we can split up.\n",
-    "with open(\"../../../docs/modules/state_of_the_union.txt\") as f:\n",
+    "with open(\"state_of_the_union.txt\") as f:\n",
    "    state_of_the_union = f.read()"
   ]
  },
--- a/docs/docs/how_to/sequence.ipynb
+++ b/docs/docs/how_to/sequence.ipynb
@@ -16,24 +16,23 @@
   "source": [
    "# How to chain runnables\n",
    "\n",
-    "One point about [LangChain Expression Language](/docs/concepts/#langchain-expression-language) is that any two runnables can be \"chained\" together into sequences. The output of the previous runnable's `.invoke()` call is passed as input to the next runnable. This can be done using the pipe operator (`|`), or the more explicit `.pipe()` method, which does the same thing.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "The resulting [`RunnableSequence`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableSequence.html) is itself a runnable, which means it can be invoked, streamed, or further chained just like any other runnable. Advantages of chaining runnables in this way are efficient streaming (the sequence will stream output as soon as it is available), and debugging and tracing with tools like [LangSmith](/docs/how_to/debugging).\n",
-    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language)\n",
    "- [Prompt templates](/docs/concepts/#prompt-templates)\n",
    "- [Chat models](/docs/concepts/#chat-models)\n",
    "- [Output parser](/docs/concepts/#output-parsers)\n",
-    "`}/>\n",
-    "```\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "One point about [LangChain Expression Language](/docs/concepts/#langchain-expression-language) is that any two runnables can be \"chained\" together into sequences. The output of the previous runnable's `.invoke()` call is passed as input to the next runnable. This can be done using the pipe operator (`|`), or the more explicit `.pipe()` method, which does the same thing.\n",
+    "\n",
+    "The resulting [`RunnableSequence`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableSequence.html) is itself a runnable, which means it can be invoked, streamed, or further chained just like any other runnable. Advantages of chaining runnables in this way are efficient streaming (the sequence will stream output as soon as it is available), and debugging and tracing with tools like [LangSmith](/docs/how_to/debugging).\n",
    "\n",
    "## The pipe operator\n",
    "\n",
-    "To show off how this works, let's go through an example. We'll walk through a common pattern in LangChain: using a [prompt template](/docs/modules/model_io/prompts/) to format input into a [chat model](/docs/modules/model_io/chat/), and finally converting the chat message output into a string with an [output parser](/docs/modules/model_io/output_parsers/).\n",
+    "To show off how this works, let's go through an example. We'll walk through a common pattern in LangChain: using a [prompt template](/docs/how_to#prompt-templates) to format input into a [chat model](/docs/how_to#chat-models), and finally converting the chat message output into a string with an [output parser](/docs/how_to#output-parsers).\n",
    "\n",
    "```{=mdx}\n",
    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
@@ -255,9 +254,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
--- a/docs/docs/how_to/split_by_token.ipynb
+++ b/docs/docs/how_to/split_by_token.ipynb
@@ -50,7 +50,7 @@
    "from langchain_text_splitters import CharacterTextSplitter\n",
    "\n",
    "# This is a long document we can split up.\n",
-    "with open(\"../../../docs/modules/state_of_the_union.txt\") as f:\n",
+    "with open(\"state_of_the_union.txt\") as f:\n",
    "    state_of_the_union = f.read()"
   ]
  },
@@ -199,7 +199,7 @@
   "outputs": [],
   "source": [
    "# This is a long document we can split up.\n",
-    "with open(\"../../../docs/modules/state_of_the_union.txt\") as f:\n",
+    "with open(\"state_of_the_union.txt\") as f:\n",
    "    state_of_the_union = f.read()"
   ]
  },
@@ -392,7 +392,7 @@
   "outputs": [],
   "source": [
    "# This is a long document we can split up.\n",
-    "with open(\"../../../docs/modules/state_of_the_union.txt\") as f:\n",
+    "with open(\"state_of_the_union.txt\") as f:\n",
    "    state_of_the_union = f.read()"
   ]
  },
@@ -596,7 +596,7 @@
   "outputs": [],
   "source": [
    "# This is a long document we can split up.\n",
-    "with open(\"../../../state_of_the_union.txt\") as f:\n",
+    "with open(\"state_of_the_union.txt\") as f:\n",
    "    state_of_the_union = f.read()\n",
    "from langchain_text_splitters import CharacterTextSplitter"
   ]
--- a/docs/docs/how_to/state_of_the_union.txt
+++ b/docs/docs/how_to/state_of_the_union.txt
@@ -0,0 +1,723 @@
+Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  
+
+Last year COVID-19 kept us apart. This year we are finally together again. 
+
+Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. 
+
+With a duty to one another to the American people to the Constitution. 
+
+And with an unwavering resolve that freedom will always triumph over tyranny. 
+
+Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. 
+
+He thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. 
+
+He met the Ukrainian people. 
+
+From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. 
+
+Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. 
+
+In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. 
+
+Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. 
+
+Please rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. 
+
+Throughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos.   
+
+They keep moving.   
+
+And the costs and the threats to America and the world keep rising.   
+
+That’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. 
+
+The United States is a member along with 29 other nations. 
+
+It matters. American diplomacy matters. American resolve matters. 
+
+Putin’s latest attack on Ukraine was premeditated and unprovoked. 
+
+He rejected repeated efforts at diplomacy. 
+
+He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready.  Here is what we did.   
+
+We prepared extensively and carefully. 
+
+We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. 
+
+I spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression.  
+
+We countered Russia’s lies with truth.   
+
+And now that he has acted the free world is holding him accountable. 
+
+Along with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland. 
+
+We are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. 
+
+Together with our allies –we are right now enforcing powerful economic sanctions. 
+
+We are cutting off Russia’s largest banks from the international financial system.  
+
+Preventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless.   
+
+We are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come.  
+
+Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. 
+
+The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs.  
+
+We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. 
+
+And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. 
+
+The Russian stock market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is to blame. 
+
+Together with our allies we are providing support to the Ukrainians in their fight for freedom. Military assistance. Economic assistance. Humanitarian assistance. 
+
+We are giving more than $1 Billion in direct assistance to Ukraine. 
+
+And we will continue to aid the Ukrainian people as they defend their country and to help ease their suffering.  
+
+Let me be clear, our forces are not engaged and will not engage in conflict with Russian forces in Ukraine.  
+
+Our forces are not going to Europe to fight in Ukraine, but to defend our NATO Allies – in the event that Putin decides to keep moving west.  
+
+For that purpose we’ve mobilized American ground forces, air squadrons, and ship deployments to protect NATO countries including Poland, Romania, Latvia, Lithuania, and Estonia. 
+
+As I have made crystal clear the United States and our Allies will defend every inch of territory of NATO countries with the full force of our collective power.  
+
+And we remain clear-eyed. The Ukrainians are fighting back with pure courage. But the next few days weeks, months, will be hard on them.  
+
+Putin has unleashed violence and chaos.  But while he may make gains on the battlefield – he will pay a continuing high price over the long run. 
+
+And a proud Ukrainian people, who have known 30 years  of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards.  
+
+To all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. 
+
+And I’m taking robust action to make sure the pain of our sanctions  is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. 
+
+Tonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world.  
+
+America will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies.  
+
+These steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. 
+
+But I want you to know that we are going to be okay. 
+
+When the history of this era is written Putin’s war on Ukraine will have left Russia weaker and the rest of the world stronger. 
+
+While it shouldn’t have taken something so terrible for people around the world to see what’s at stake now everyone sees it clearly. 
+
+We see the unity among leaders of nations and a more unified Europe a more unified West. And we see unity among the people who are gathering in cities in large crowds around the world even in Russia to demonstrate their support for Ukraine.  
+
+In the battle between democracy and autocracy, democracies are rising to the moment, and the world is clearly choosing the side of peace and security. 
+
+This is a real test. It’s going to take time. So let us continue to draw inspiration from the iron will of the Ukrainian people. 
+
+To our fellow Ukrainian Americans who forge a deep bond that connects our two nations we stand with you. 
+
+Putin may circle Kyiv with tanks, but he will never gain the hearts and souls of the Ukrainian people. 
+
+He will never extinguish their love of freedom. He will never weaken the resolve of the free world. 
+
+We meet tonight in an America that has lived through two of the hardest years this nation has ever faced. 
+
+The pandemic has been punishing. 
+
+And so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. 
+
+I understand. 
+
+I remember when my Dad had to leave our home in Scranton, Pennsylvania to find work. I grew up in a family where if the price of food went up, you felt it. 
+
+That’s why one of the first things I did as President was fight to pass the American Rescue Plan.  
+
+Because people were hurting. We needed to act, and we did. 
+
+Few pieces of legislation have done more in a critical moment in our history to lift us out of crisis. 
+
+It fueled our efforts to vaccinate the nation and combat COVID-19. It delivered immediate economic relief for tens of millions of Americans.  
+
+Helped put food on their table, keep a roof over their heads, and cut the cost of health insurance. 
+
+And as my Dad used to say, it gave people a little breathing room. 
+
+And unlike the $2 Trillion tax cut passed in the previous administration that benefitted the top 1% of Americans, the American Rescue Plan helped working people—and left no one behind. 
+
+And it worked. It created jobs. Lots of jobs. 
+
+In fact—our economy created over 6.5 Million new jobs just last year, more jobs created in one year  
+than ever before in the history of America. 
+
+Our economy grew at a rate of 5.7% last year, the strongest growth in nearly 40 years, the first step in bringing fundamental change to an economy that hasn’t worked for the working people of this nation for too long.  
+
+For the past 40 years we were told that if we gave tax breaks to those at the very top, the benefits would trickle down to everyone else. 
+
+But that trickle-down theory led to weaker economic growth, lower wages, bigger deficits, and the widest gap between those at the top and everyone else in nearly a century. 
+
+Vice President Harris and I ran for office with a new economic vision for America. 
+
+Invest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up  
+and the middle out, not from the top down.  
+
+Because we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. 
+
+America used to have the best roads, bridges, and airports on Earth. 
+
+Now our infrastructure is ranked 13th in the world. 
+
+We won’t be able to compete for the jobs of the 21st Century if we don’t fix that. 
+
+That’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. 
+
+This was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. 
+
+We’re done talking about infrastructure weeks. 
+
+We’re going to have an infrastructure decade. 
+
+It is going to transform America and put us on a path to win the economic competition of the 21st Century that we face with the rest of the world—particularly with China.  
+
+As I’ve told Xi Jinping, it is never a good bet to bet against the American people. 
+
+We’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. 
+
+And we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice. 
+
+We’ll build a national network of 500,000 electric vehicle charging stations, begin to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal communities. 
+
+4,000 projects have already been announced. 
+
+And tonight, I’m announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair. 
+
+When we use taxpayer dollars to rebuild America – we are going to Buy American: buy American products to support American jobs. 
+
+The federal government spends about $600 Billion a year to keep the country safe and secure. 
+
+There’s been a law on the books for almost a century 
+to make sure taxpayers’ dollars support American jobs and businesses. 
+
+Every Administration says they’ll do it, but we are actually doing it. 
+
+We will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. 
+
+But to compete for the best jobs of the future, we also need to level the playing field with China and other competitors. 
+
+That’s why it is so important to pass the Bipartisan Innovation Act sitting in Congress that will make record investments in emerging technologies and American manufacturing. 
+
+Let me give you one example of why it’s so important to pass it. 
+
+If you travel 20 miles east of Columbus, Ohio, you’ll find 1,000 empty acres of land. 
+
+It won’t look like much, but if you stop and look closely, you’ll see a “Field of dreams,” the ground on which America’s future will be built. 
+
+This is where Intel, the American company that helped build Silicon Valley, is going to build its $20 billion semiconductor “mega site”. 
+
+Up to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. 
+
+Some of the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power the world and our everyday lives. 
+
+Smartphones. The Internet. Technology we have yet to invent. 
+
+But that’s just the beginning. 
+
+Intel’s CEO, Pat Gelsinger, who is here tonight, told me they are ready to increase their investment from  
+$20 billion to $100 billion. 
+
+That would be one of the biggest investments in manufacturing in American history. 
+
+And all they’re waiting for is for you to pass this bill. 
+
+So let’s not wait any longer. Send it to my desk. I’ll sign it.  
+
+And we will really take off. 
+
+And Intel is not alone. 
+
+There’s something happening in America. 
+
+Just look around and you’ll see an amazing story. 
+
+The rebirth of the pride that comes from stamping products “Made In America.” The revitalization of American manufacturing.   
+
+Companies are choosing to build new factories here, when just a few years ago, they would have built them overseas. 
+
+That’s what is happening. Ford is investing $11 billion to build electric vehicles, creating 11,000 jobs across the country. 
+
+GM is making the largest investment in its history—$7 billion to build electric vehicles, creating 4,000 jobs in Michigan. 
+
+All told, we created 369,000 new manufacturing jobs in America just last year. 
+
+Powered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight. 
+
+As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” 
+
+It’s time. 
+
+But with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills.  
+
+Inflation is robbing them of the gains they might otherwise feel. 
+
+I get it. That’s why my top priority is getting prices under control. 
+
+Look, our economy roared back faster than most predicted, but the pandemic meant that businesses had a hard time hiring enough workers to keep up production in their factories. 
+
+The pandemic also disrupted global supply chains. 
+
+When factories close, it takes longer to make goods and get them from the warehouse to the store, and prices go up. 
+
+Look at cars. 
+
+Last year, there weren’t enough semiconductors to make all the cars that people wanted to buy. 
+
+And guess what, prices of automobiles went up. 
+
+So—we have a choice. 
+
+One way to fight inflation is to drive down wages and make Americans poorer.  
+
+I have a better plan to fight inflation. 
+
+Lower your costs, not your wages. 
+
+Make more cars and semiconductors in America. 
+
+More infrastructure and innovation in America. 
+
+More goods moving faster and cheaper in America. 
+
+More jobs where you can earn a good living in America. 
+
+And instead of relying on foreign supply chains, let’s make it in America. 
+
+Economists call it “increasing the productive capacity of our economy.” 
+
+I call it building a better America. 
+
+My plan to fight inflation will lower your costs and lower the deficit. 
+
+17 Nobel laureates in economics say my plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And here’s the plan: 
+
+First – cut the cost of prescription drugs. Just look at insulin. One in ten Americans has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis.  
+
+He and his Dad both have Type 1 diabetes, which means they need insulin every day. Insulin costs about $10 a vial to make.  
+
+But drug companies charge families like Joshua and his Dad up to 30 times more. I spoke with Joshua’s mom. 
+
+Imagine what it’s like to look at your child who needs insulin and have no idea how you’re going to pay for it.  
+
+What it does to your dignity, your ability to look your child in the eye, to be the parent you expect to be. 
+
+Joshua is here with us tonight. Yesterday was his birthday. Happy birthday, buddy.  
+
+For Joshua, and for the 200,000 other young people with Type 1 diabetes, let’s cap the cost of insulin at $35 a month so everyone can afford it.  
+
+Drug companies will still do very well. And while we’re at it let Medicare negotiate lower prices for prescription drugs, like the VA already does. 
+
+Look, the American Rescue Plan is helping millions of families on Affordable Care Act plans save $2,400 a year on their health care premiums. Let’s close the coverage gap and make those savings permanent. 
+
+Second – cut energy costs for families an average of $500 a year by combatting climate change.  
+
+Let’s provide investments and tax credits to weatherize your homes and businesses to be energy efficient and you get a tax credit; double America’s clean energy production in solar, wind, and so much more;  lower the price of electric vehicles, saving you another $80 a month because you’ll never have to pay at the gas pump again. 
+
+Third – cut the cost of child care. Many families pay up to $14,000 a year for child care per child.  
+
+Middle-class and working families shouldn’t have to pay more than 7% of their income for care of young children.  
+
+My plan will cut the cost in half for most families and help parents, including millions of women, who left the workforce during the pandemic because they couldn’t afford child care, to be able to get back to work. 
+
+My plan doesn’t stop there. It also includes home and long-term care. More affordable housing. And Pre-K for every 3- and 4-year-old.  
+
+All of these will lower costs. 
+
+And under my plan, nobody earning less than $400,000 a year will pay an additional penny in new taxes. Nobody.  
+
+The one thing all Americans agree on is that the tax system is not fair. We have to fix it.  
+
+I’m not looking to punish anyone. But let’s make sure corporations and the wealthiest Americans start paying their fair share. 
+
+Just last year, 55 Fortune 500 corporations earned $40 billion in profits and paid zero dollars in federal income tax.  
+
+That’s simply not fair. That’s why I’ve proposed a 15% minimum tax rate for corporations. 
+
+We got more than 130 countries to agree on a global minimum tax rate so companies can’t get out of paying their taxes at home by shipping jobs and factories overseas. 
+
+That’s why I’ve proposed closing loopholes so the very wealthy don’t pay a lower tax rate than a teacher or a firefighter.  
+
+So that’s my plan. It will grow the economy and lower costs for families. 
+
+So what are we waiting for? Let’s get this done. And while you’re at it, confirm my nominees to the Federal Reserve, which plays a critical role in fighting inflation.  
+
+My plan will not only lower costs to give families a fair shot, it will lower the deficit. 
+
+The previous Administration not only ballooned the deficit with tax cuts for the very wealthy and corporations, it undermined the watchdogs whose job was to keep pandemic relief funds from being wasted. 
+
+But in my administration, the watchdogs have been welcomed back. 
+
+We’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans.  
+
+And tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. 
+
+By the end of this year, the deficit will be down to less than half what it was before I took office.  
+
+The only president ever to cut the deficit by more than one trillion dollars in a single year. 
+
+Lowering your costs also means demanding more competition. 
+
+I’m a capitalist, but capitalism without competition isn’t capitalism. 
+
+It’s exploitation—and it drives up prices. 
+
+When corporations don’t have to compete, their profits go up, your prices go up, and small businesses and family farmers and ranchers go under. 
+
+We see it happening with ocean carriers moving goods in and out of America. 
+
+During the pandemic, these foreign-owned companies raised prices by as much as 1,000% and made record profits. 
+
+Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. 
+
+And as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up.  
+
+That ends on my watch. 
+
+Medicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. 
+
+We’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. 
+
+Let’s pass the Paycheck Fairness Act and paid leave.  
+
+Raise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. 
+
+Let’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges. 
+
+And let’s pass the PRO Act when a majority of workers want to form a union—they shouldn’t be stopped.  
+
+When we invest in our workers, when we build the economy from the bottom up and the middle out together, we can do something we haven’t done in a long time: build a better America. 
+
+For more than two years, COVID-19 has impacted every decision in our lives and the life of the nation. 
+
+And I know you’re tired, frustrated, and exhausted. 
+
+But I also know this. 
+
+Because of the progress we’ve made, because of your resilience and the tools we have, tonight I can say  
+we are moving forward safely, back to more normal routines.  
+
+We’ve reached a new moment in the fight against COVID-19, with severe cases down to a level not seen since last July.  
+
+Just a few days ago, the Centers for Disease Control and Prevention—the CDC—issued new mask guidelines. 
+
+Under these new guidelines, most Americans in most of the country can now be mask free.   
+
+And based on the projections, more of the country will reach that point across the next couple of weeks. 
+
+Thanks to the progress we have made this past year, COVID-19 need no longer control our lives.  
+
+I know some are talking about “living with COVID-19”. Tonight – I say that we will never just accept living with COVID-19. 
+
+We will continue to combat the virus as we do other diseases. And because this is a virus that mutates and spreads, we will stay on guard. 
+
+Here are four common sense steps as we move forward safely.  
+
+First, stay protected with vaccines and treatments. We know how incredibly effective vaccines are. If you’re vaccinated and boosted you have the highest degree of protection. 
+
+We will never give up on vaccinating more Americans. Now, I know parents with kids under 5 are eager to see a vaccine authorized for their children. 
+
+The scientists are working hard to get that done and we’ll be ready with plenty of vaccines when they do. 
+
+We’re also ready with anti-viral treatments. If you get COVID-19, the Pfizer pill reduces your chances of ending up in the hospital by 90%.  
+
+We’ve ordered more of these pills than anyone in the world. And Pfizer is working overtime to get us 1 Million pills this month and more than double that next month.  
+
+And we’re launching the “Test to Treat” initiative so people can get tested at a pharmacy, and if they’re positive, receive antiviral pills on the spot at no cost.  
+
+If you’re immunocompromised or have some other vulnerability, we have treatments and free high-quality masks. 
+
+We’re leaving no one behind or ignoring anyone’s needs as we move forward. 
+
+And on testing, we have made hundreds of millions of tests available for you to order for free.   
+
+Even if you already ordered free tests tonight, I am announcing that you can order more from covidtests.gov starting next week. 
+
+Second – we must prepare for new variants. Over the past year, we’ve gotten much better at detecting new variants. 
+
+If necessary, we’ll be able to deploy new vaccines within 100 days instead of many more months or years.  
+
+And, if Congress provides the funds we need, we’ll have new stockpiles of tests, masks, and pills ready if needed. 
+
+I cannot promise a new variant won’t come. But I can promise you we’ll do everything within our power to be ready if it does.  
+
+Third – we can end the shutdown of schools and businesses. We have the tools we need. 
+
+It’s time for Americans to get back to work and fill our great downtowns again.  People working from home can feel safe to begin to return to the office.   
+
+We’re doing that here in the federal government. The vast majority of federal workers will once again work in person. 
+
+Our schools are open. Let’s keep it that way. Our kids need to be in school. 
+
+And with 75% of adult Americans fully vaccinated and hospitalizations down by 77%, most Americans can remove their masks, return to work, stay in the classroom, and move forward safely. 
+
+We achieved this because we provided free vaccines, treatments, tests, and masks. 
+
+Of course, continuing this costs money. 
+
+I will soon send Congress a request. 
+
+The vast majority of Americans have used these tools and may want to again, so I expect Congress to pass it quickly.   
+
+Fourth, we will continue vaccinating the world.     
+
+We’ve sent 475 Million vaccine doses to 112 countries, more than any other nation. 
+
+And we won’t stop. 
+
+We have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. 
+
+Let’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease.  
+
+Let’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans.  
+
+We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. 
+
+I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. 
+
+They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. 
+
+Officer Mora was 27 years old. 
+
+Officer Rivera was 22. 
+
+Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. 
+
+I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. 
+
+I’ve worked on these issues a long time. 
+
+I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. 
+
+So let’s not abandon our streets. Or choose between safety and equal justice. 
+
+Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. 
+
+That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. 
+
+That’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope.  
+
+We should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. 
+
+I ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe.  
+
+And I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. 
+
+And I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? 
+
+Ban assault weapons and high-capacity magazines. 
+
+Repeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. 
+
+These laws don’t infringe on the Second Amendment. They save lives. 
+
+The most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. 
+
+In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. 
+
+We cannot let this happen. 
+
+Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 
+
+Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 
+
+One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 
+
+And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. 
+
+A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. 
+
+And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. 
+
+We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  
+
+We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  
+
+We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. 
+
+We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders. 
+
+We can do all this while keeping lit the torch of liberty that has led generations of immigrants to this land—my forefathers and so many of yours. 
+
+Provide a pathway to citizenship for Dreamers, those on temporary status, farm workers, and essential workers. 
+
+Revise our laws so businesses have the workers they need and families don’t wait decades to reunite. 
+
+It’s not only the right thing to do—it’s the economically smart thing to do. 
+
+That’s why immigration reform is supported by everyone from labor unions to religious leaders to the U.S. Chamber of Commerce. 
+
+Let’s get it done once and for all. 
+
+Advancing liberty and justice also requires protecting the rights of women. 
+
+The constitutional right affirmed in Roe v. Wade—standing precedent for half a century—is under attack as never before. 
+
+If we want to go forward—not backward—we must protect access to health care. Preserve a woman’s right to choose. And let’s continue to advance maternal health care in America. 
+
+And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. 
+
+As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. 
+
+While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. 
+
+And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. 
+
+So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together.  
+
+First, beat the opioid epidemic. 
+
+There is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery.  
+
+Get rid of outdated rules that stop doctors from prescribing treatments. And stop the flow of illicit drugs by working with state and local law enforcement to go after traffickers. 
+
+If you’re suffering from addiction, know you are not alone. I believe in recovery, and I celebrate the 23 million Americans in recovery. 
+
+Second, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down.  
+
+The American Rescue Plan gave schools money to hire teachers and help students make up for lost learning.  
+
+I urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mentor. 
+
+Children were also struggling before the pandemic. Bullying, violence, trauma, and the harms of social media. 
+
+As Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit. 
+
+It’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children. 
+
+And let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care. 
+
+Third, support our veterans. 
+
+Veterans are the best of us. 
+
+I’ve always believed that we have a sacred obligation to equip all those we send to war and care for them and their families when they come home. 
+
+My administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free.  
+
+Our troops in Iraq and Afghanistan faced many dangers. 
+
+One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. 
+
+When they came home, many of the world’s fittest and best trained warriors were never the same. 
+
+Headaches. Numbness. Dizziness. 
+
+A cancer that would put them in a flag-draped coffin. 
+
+I know. 
+
+One of those soldiers was my son Major Beau Biden. 
+
+We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. 
+
+But I’m committed to finding out everything we can. 
+
+Committed to military families like Danielle Robinson from Ohio. 
+
+The widow of Sergeant First Class Heath Robinson.  
+
+He was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. 
+
+Stationed near Baghdad, just yards from burn pits the size of football fields. 
+
+Heath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter. 
+
+But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. 
+
+Danielle says Heath was a fighter to the very end. 
+
+He didn’t know how to stop fighting, and neither did she. 
+
+Through her pain she found purpose to demand we do better. 
+
+Tonight, Danielle—we are. 
+
+The VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits. 
+
+And tonight, I’m announcing we’re expanding eligibility to veterans suffering from nine respiratory cancers. 
+
+I’m also calling on Congress: pass a law to make sure veterans devastated by toxic exposures in Iraq and Afghanistan finally get the benefits and comprehensive health care they deserve. 
+
+And fourth, let’s end cancer as we know it. 
+
+This is personal to me and Jill, to Kamala, and to so many of you. 
+
+Cancer is the #2 cause of death in America–second only to heart disease. 
+
+Last month, I announced our plan to supercharge  
+the Cancer Moonshot that President Obama asked me to lead six years ago. 
+
+Our goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases.  
+
+More support for patients and families. 
+
+To get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. 
+
+It’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more.  
+
+ARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. 
+
+A unity agenda for the nation. 
+
+We can do this. 
+
+My fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. 
+
+In this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. 
+
+We have fought for freedom, expanded liberty, defeated totalitarianism and terror. 
+
+And built the strongest, freest, and most prosperous nation the world has ever known. 
+
+Now is the hour. 
+
+Our moment of responsibility. 
+
+Our test of resolve and conscience, of history itself. 
+
+It is in this moment that our character is formed. Our purpose is found. Our future is forged. 
+
+Well I know this nation.  
+
+We will meet the test. 
+
+To protect freedom and liberty, to expand fairness and opportunity. 
+
+We will save democracy. 
+
+As hard as these times have been, I am more optimistic about America today than I have been my whole life. 
+
+Because I see the future that is within our grasp. 
+
+Because I know there is simply nothing beyond our capacity. 
+
+We are the only nation on Earth that has always turned every crisis we have faced into an opportunity. 
+
+The only nation that can be defined by a single word: possibilities. 
+
+So on this night, in our 245th year as a nation, I have come to report on the State of the Union. 
+
+And my report is this: the State of the Union is strong—because you, the American people, are strong. 
+
+We are stronger today than we were a year ago. 
+
+And we will be stronger a year from now than we are today. 
+
+Now is our moment to meet and overcome the challenges of our time. 
+
+And we will, as one people. 
+
+One America. 
+
+The United States of America. 
+
+May God bless you all. May God protect our troops.
--- a/docs/docs/how_to/streaming.ipynb
+++ b/docs/docs/how_to/streaming.ipynb
@@ -15,11 +15,20 @@
   "id": "bb7d49db-04d3-4399-bfe1-09f82bbe6015",
   "metadata": {},
   "source": [
-    "# How to stream\n",
+    "# How to stream runnables\n",
+    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Chat models](/docs/concepts/#chat-models)\n",
+    "- [LangChain Expression Language](/docs/concepts/#langchain-expression-language)\n",
+    "- [Output parsers](/docs/concepts/#output-parsers)\n",
+    "\n",
+    ":::\n",
    "\n",
    "Streaming is critical in making applications based on LLMs feel responsive to end-users.\n",
    "\n",
-    "Important LangChain primitives like [chat models](/docs/concepts/#chat-models), [output parsers](/docs/concepts/#output-parsers), [prompts](/docs/concepts/#prompt-templates), [retrievers](/docs/concepts/#retrievers), and [agents](/docs/concepts/#agents) implement the LangChain [Runnable Interface](/docs/expression_language/interface).\n",
+    "Important LangChain primitives like [chat models](/docs/concepts/#chat-models), [output parsers](/docs/concepts/#output-parsers), [prompts](/docs/concepts/#prompt-templates), [retrievers](/docs/concepts/#retrievers), and [agents](/docs/concepts/#agents) implement the LangChain [Runnable Interface](/docs/concepts#interface).\n",
    "\n",
    "This interface provides two general approaches to stream content:\n",
    "\n",
@@ -28,16 +37,6 @@
    "\n",
    "Let's take a look at both approaches, and try to understand how to use them.\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "- [LangChain Expression Language](/docs/concepts/#langchain-expression-language)\n",
-    "- [Output parsers](/docs/concepts/#output-parsers)\n",
-    "`} />\n",
-    "```\n",
-    "\n",
    "## Using Stream\n",
    "\n",
    "All `Runnable` objects implement a sync method called `stream` and an async variant called `astream`. \n",
@@ -246,9 +245,9 @@
   "id": "868bc412",
   "metadata": {},
   "source": [
-    "You might notice above that `parser` actually doesn't block the streaming output from the model, and instead processes each chunk individually. Many of the [LCEL primitives](/docs/expression_language/primitives) also support this kind of transform-style passthrough streaming, which can be very convenient when constructing apps.\n",
+    "You might notice above that `parser` actually doesn't block the streaming output from the model, and instead processes each chunk individually. Many of the [LCEL primitives](/docs/how_to#langchain-expression-language-lcel) also support this kind of transform-style passthrough streaming, which can be very convenient when constructing apps.\n",
    "\n",
-    "Certain runnables, like [prompt templates](/docs/modules/model_io/prompts) and [chat models](/docs/modules/model_io/chat), cannot process individual chunks and instead aggregate all previous steps. This will interrupt the streaming process. Custom functions can be [designed to return generators](/docs/expression_language/primitives/functions#streaming), which"
+    "Certain runnables, like [prompt templates](/docs/how_to#prompt-templates) and [chat models](/docs/how_to#chat-models), cannot process individual chunks and instead aggregate all previous steps. This will interrupt the streaming process. Custom functions can be [designed to return generators](/docs/how_to/functions#streaming), which"
   ]
  },
  {
@@ -1464,7 +1463,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/structured_output.ipynb
+++ b/docs/docs/how_to/structured_output.ipynb
@@ -17,31 +17,34 @@
   "source": [
    "# How to return structured data from a model\n",
    "\n",
-    "It is often useful to have a model return output that matches some specific schema. One common use-case is extracting data from arbitrary text to insert into a traditional database or use with some other downstrem system. This guide will show you a few different strategies you can use to do this.\n",
+    ":::info Prerequisites\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
+    "This guide assumes familiarity with the following concepts:\n",
    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "`}/>\n",
-    "```\n",
+    "- [Function/tool calling](/docs/concepts/#functiontool-calling)\n",
+    ":::\n",
+    "\n",
+    "It is often useful to have a model return output that matches a specific schema. One common use-case is extracting data from text to insert into a database or use with some other downstream system. This guide covers a few strategies for getting structured outputs from a model.\n",
    "\n",
    "## The `.with_structured_output()` method\n",
    "\n",
-    "There are several strategies that models can use under the hood. For some of the most popular model providers, including [OpenAI](/docs/integrations/platforms/openai/), [Anthropic](/docs/integrations/platforms/anthropic/), and [Mistral](/docs/integrations/providers/mistralai/), LangChain implements a common interface that abstracts away these strategies called `.with_structured_output`.\n",
+    ":::info Supported models\n",
    "\n",
-    "By invoking this method (and passing in [JSON schema](https://json-schema.org/) or a [Pydantic](https://docs.pydantic.dev/latest/) model) the model will add whatever model parameters + output parsers are necessary to get back structured output matching the requested schema. If the model supports more than one way to do this (e.g., function calling vs JSON mode) - you can configure which method to use by passing into that method.\n",
+    "You can find a [list of models that support this method here](/docs/integrations/chat/).\n",
    "\n",
-    "You can find the [current list of models that support this method here](/docs/integrations/chat/).\n",
+    ":::\n",
    "\n",
-    "Let's look at some examples of this in action! We'll use Pydantic to create a simple response schema.\n",
+    "This is the easiest and most reliable way to get structured outputs. `with_structured_output()` is implemented for models that provide native APIs for structuring outputs, like tool/function calling or JSON mode, and makes use of these capabilities under the hood.\n",
+    "\n",
+    "This method takes a schema as input which specifies the names, types, and descriptions of the desired output attributes. The method returns a model-like Runnable, except that instead of outputting strings or Messages it outputs objects corresponding to the given schema. The schema can be specified as a [JSON Schema](https://json-schema.org/) or a Pydantic class. If JSON Schema is used then a dictionary will be returned by the Runnable, and if a Pydantic class is used then Pydantic objects will be returned.\n",
+    "\n",
+    "As an example, let's get a model to generate a joke and separate the setup from the punchline:\n",
    "\n",
    "```{=mdx}\n",
    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
    "\n",
    "<ChatModelTabs\n",
-    "  customVarName=\"model\"\n",
+    "  customVarName=\"llm\"\n",
    "/>\n",
    "```"
   ]
@@ -58,25 +61,30 @@
    "\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "model = ChatOpenAI(\n",
-    "    model=\"gpt-4-0125-preview\",\n",
-    "    temperature=0,\n",
-    ")"
+    "llm = ChatOpenAI(model=\"gpt-4-0125-preview\", temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a808a401-be1f-49f9-ad13-58dd68f7db5f",
+   "metadata": {},
+   "source": [
+    "If we want the model to return a Pydantic object, we just need to pass in desired the Pydantic class:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 38,
   "id": "070bf702",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=None)"
+       "Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=None)"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -88,35 +96,39 @@
    "\n",
    "\n",
    "class Joke(BaseModel):\n",
+    "    \"\"\"Joke to tell user.\"\"\"\n",
+    "\n",
    "    setup: str = Field(description=\"The setup of the joke\")\n",
    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
    "    rating: Optional[int] = Field(description=\"How funny the joke is, from 1 to 10\")\n",
    "\n",
    "\n",
-    "structured_llm = model.with_structured_output(Joke)\n",
+    "structured_llm = llm.with_structured_output(Joke)\n",
    "\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "00890a47-3cdf-4805-b8f1-6d110f0633d3",
+   "metadata": {},
+   "source": [
+    ":::tip\n",
+    "Beyond just the structure of the Pydantic class, the name of the Pydantic class, the docstring, and the names and provided descriptions of parameters are very important. Most of the time `with_structured_output` is using a model's function/tool calling API, and you can effectively think of all of this information as being added to the model prompt.\n",
+    ":::"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "deddb6d3",
   "metadata": {},
   "source": [
-    "The result is a Pydantic model. Note that name of the model and the names and provided descriptions of parameters are very important, as they help guide the model's output.\n",
-    "\n",
-    "We can also pass in an OpenAI-style JSON schema dict if you prefer not to use Pydantic. This dict should contain three properties:\n",
-    "\n",
-    "- `name`: The name of the schema to output.\n",
-    "- `description`: A high level description of the schema to output.\n",
-    "- `parameters`: The nested details of the schema you want to extract, formatted as a [JSON schema](https://json-schema.org/) dict.\n",
-    "\n",
-    "In this case, the response is also a dict:"
+    "We can also pass in a [JSON Schema](https://json-schema.org/) dict if you prefer not to use Pydantic. In this case, the response is also a dict:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
   "id": "6700994a",
   "metadata": {},
   "outputs": [
@@ -124,30 +136,37 @@
     "data": {
      "text/plain": [
       "{'setup': 'Why was the cat sitting on the computer?',\n",
-       " 'punchline': 'To keep an eye on the mouse!'}"
+       " 'punchline': 'Because it wanted to keep an eye on the mouse!',\n",
+       " 'rating': 8}"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "structured_llm = model.with_structured_output(\n",
-    "    {\n",
-    "        \"name\": \"joke\",\n",
-    "        \"description\": \"Joke to tell user.\",\n",
-    "        \"parameters\": {\n",
-    "            \"title\": \"Joke\",\n",
-    "            \"type\": \"object\",\n",
-    "            \"properties\": {\n",
-    "                \"setup\": {\"type\": \"string\", \"description\": \"The setup for the joke\"},\n",
-    "                \"punchline\": {\"type\": \"string\", \"description\": \"The joke's punchline\"},\n",
-    "            },\n",
-    "            \"required\": [\"setup\", \"punchline\"],\n",
+    "json_schema = {\n",
+    "    \"title\": \"joke\",\n",
+    "    \"description\": \"Joke to tell user.\",\n",
+    "    \"type\": \"object\",\n",
+    "    \"properties\": {\n",
+    "        \"setup\": {\n",
+    "            \"type\": \"string\",\n",
+    "            \"description\": \"The setup of the joke\",\n",
    "        },\n",
-    "    }\n",
-    ")\n",
+    "        \"punchline\": {\n",
+    "            \"type\": \"string\",\n",
+    "            \"description\": \"The punchline to the joke\",\n",
+    "        },\n",
+    "        \"rating\": {\n",
+    "            \"type\": \"integer\",\n",
+    "            \"description\": \"How funny the joke is, from 1 to 10\",\n",
+    "        },\n",
+    "    },\n",
+    "    \"required\": [\"setup\", \"punchline\"],\n",
+    "}\n",
+    "structured_llm = llm.with_structured_output(json_schema)\n",
    "\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ]
@@ -159,7 +178,7 @@
   "source": [
    "### Choosing between multiple schemas\n",
    "\n",
-    "If you have multiple schemas that are valid outputs for the model, you can use Pydantic's `Union` type:"
+    "The simplest way to let the model choose from multiple schemas is to create a parent Pydantic class that has a Union-typed attribute:"
   ]
  },
  {
@@ -171,7 +190,7 @@
    {
     "data": {
      "text/plain": [
-       "Response(output=Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!'))"
+       "Response(output=Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=8))"
      ]
     },
     "execution_count": 4,
@@ -182,15 +201,10 @@
   "source": [
    "from typing import Union\n",
    "\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
-    "\n",
-    "\n",
-    "class Joke(BaseModel):\n",
-    "    setup: str = Field(description=\"The setup of the joke\")\n",
-    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
-    "\n",
    "\n",
    "class ConversationalResponse(BaseModel):\n",
+    "    \"\"\"Respond in a conversational manner. Be kind and helpful.\"\"\"\n",
+    "\n",
    "    response: str = Field(description=\"A conversational response to the user's query\")\n",
    "\n",
    "\n",
@@ -198,7 +212,7 @@
    "    output: Union[Joke, ConversationalResponse]\n",
    "\n",
    "\n",
-    "structured_llm = model.with_structured_output(Response)\n",
+    "structured_llm = llm.with_structured_output(Response)\n",
    "\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ]
@@ -212,7 +226,7 @@
    {
     "data": {
      "text/plain": [
-       "Response(output=ConversationalResponse(response=\"I'm just a collection of code, so I don't have feelings, but thanks for asking! How can I assist you today?\"))"
+       "Response(output=ConversationalResponse(response=\"I'm just a digital assistant, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?\"))"
      ]
     },
     "execution_count": 5,
@@ -229,9 +243,225 @@
   "id": "e28c14d3",
   "metadata": {},
   "source": [
-    "If you are using JSON Schema, you can take advantage of other more complex schema descriptions to create a similar effect.\n",
+    "Alternatively, you can use tool calling directly to allow the model to choose between options, if your [chosen model supports it](/docs/integrations/chat/). This involves a bit more parsing and setup but in some instances leads to better performance because you don't have to use nested schemas. See [this how-to guide](/docs/how_to/tool_calling/) for more details."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a40f703-7fd2-4fe0-ab2a-fa2d711ba009",
+   "metadata": {},
+   "source": [
+    "### Streaming\n",
    "\n",
-    "You can also use tool calling directly to allow the model to choose between options, if your chosen model supports it. This involves a bit more parsing and setup. See [this how-to guide](/docs/how_to/tool_calling/) for more details."
+    "We can stream outputs from our structured model when the output type is a dict (i.e., when the schema is specified as a JSON Schema dict). \n",
+    "\n",
+    ":::info\n",
+    "\n",
+    "Note that what's yielded is already aggregated chunks, not deltas.\n",
+    "\n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "aff89877-28a3-472f-a1aa-eff893fe7736",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{}\n",
+      "{'setup': ''}\n",
+      "{'setup': 'Why'}\n",
+      "{'setup': 'Why was'}\n",
+      "{'setup': 'Why was the'}\n",
+      "{'setup': 'Why was the cat'}\n",
+      "{'setup': 'Why was the cat sitting'}\n",
+      "{'setup': 'Why was the cat sitting on'}\n",
+      "{'setup': 'Why was the cat sitting on the'}\n",
+      "{'setup': 'Why was the cat sitting on the computer'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': ''}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 8}\n"
+     ]
+    }
+   ],
+   "source": [
+    "structured_llm = llm.with_structured_output(json_schema)\n",
+    "\n",
+    "for chunk in structured_llm.stream(\"Tell me a joke about cats\"):\n",
+    "    print(chunk)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a526cdf-e736-451b-96be-22e8986d3863",
+   "metadata": {},
+   "source": [
+    "### Few-shot prompting\n",
+    "\n",
+    "For more complex schemas it's very useful to add few-shot examples to the prompt. This can be done in a few ways.\n",
+    "\n",
+    "The simplest and most universal way is to add examples to a system message in the prompt:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "283ba784-2072-47ee-9b2c-1119e3c69e8e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'setup': 'Woodpecker',\n",
+       " 'punchline': \"Woodpecker goes 'knock knock', but don't worry, they never expect you to answer the door!\",\n",
+       " 'rating': 8}"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "system = \"\"\"You are a hilarious comedian. Your specialty is knock-knock jokes. \\\n",
+    "Return a joke which has the setup (the response to \"Who's there?\") and the final punchline (the response to \"<setup> who?\").\n",
+    "\n",
+    "Here are some examples of jokes:\n",
+    "\n",
+    "example_user: Tell me a joke about planes\n",
+    "example_assistant: {{\"setup\": \"Why don't planes ever get tired?\", \"punchline\": \"Because they have rest wings!\", \"rating\": 2}}\n",
+    "\n",
+    "example_user: Tell me another joke about planes\n",
+    "example_assistant: {{\"setup\": \"Cargo\", \"punchline\": \"Cargo 'vroom vroom', but planes go 'zoom zoom'!\", \"rating\": 10}}\n",
+    "\n",
+    "example_user: Now about caterpillars\n",
+    "example_assistant: {{\"setup\": \"Caterpillar\", \"punchline\": \"Caterpillar really slow, but watch me turn into a butterfly and steal the show!\", \"rating\": 5}}\"\"\"\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", \"{input}\")])\n",
+    "\n",
+    "few_shot_structured_llm = prompt | structured_llm\n",
+    "few_shot_structured_llm.invoke(\"what's something funny about woodpeckers\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c12b389-153d-44d1-af34-37e5b926d3db",
+   "metadata": {},
+   "source": [
+    "When the underlying method for structuring outputs is tool calling, we can pass in our examples as explicit tool calls. You can check if the model you're using makes use of tool calling in its API reference."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "d7381cb0-b2c3-4302-a319-ed72d0b9e43f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'setup': 'Crocodile',\n",
+       " 'punchline': \"Crocodile 'see you later', but in a while, it becomes an alligator!\",\n",
+       " 'rating': 7}"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.messages import AIMessage, HumanMessage, ToolMessage\n",
+    "\n",
+    "examples = [\n",
+    "    HumanMessage(\"Tell me a joke about planes\", name=\"example_user\"),\n",
+    "    AIMessage(\n",
+    "        \"\",\n",
+    "        name=\"example_assistant\",\n",
+    "        tool_calls=[\n",
+    "            {\n",
+    "                \"name\": \"joke\",\n",
+    "                \"args\": {\n",
+    "                    \"setup\": \"Why don't planes ever get tired?\",\n",
+    "                    \"punchline\": \"Because they have rest wings!\",\n",
+    "                    \"rating\": 2,\n",
+    "                },\n",
+    "                \"id\": \"1\",\n",
+    "            }\n",
+    "        ],\n",
+    "    ),\n",
+    "    # Most tool-calling models expect a ToolMessage(s) to follow an AIMessage with tool calls.\n",
+    "    ToolMessage(\"\", tool_call_id=\"1\"),\n",
+    "    # Some models also expect an AIMessage to follow any ToolMessages,\n",
+    "    # so you may need to add an AIMessage here.\n",
+    "    HumanMessage(\"Tell me another joke about planes\", name=\"example_user\"),\n",
+    "    AIMessage(\n",
+    "        \"\",\n",
+    "        name=\"example_assistant\",\n",
+    "        tool_calls=[\n",
+    "            {\n",
+    "                \"name\": \"joke\",\n",
+    "                \"args\": {\n",
+    "                    \"setup\": \"Cargo\",\n",
+    "                    \"punchline\": \"Cargo 'vroom vroom', but planes go 'zoom zoom'!\",\n",
+    "                    \"rating\": 10,\n",
+    "                },\n",
+    "                \"id\": \"2\",\n",
+    "            }\n",
+    "        ],\n",
+    "    ),\n",
+    "    ToolMessage(\"\", tool_call_id=\"2\"),\n",
+    "    HumanMessage(\"Now about caterpillars\", name=\"example_user\"),\n",
+    "    AIMessage(\n",
+    "        \"\",\n",
+    "        tool_calls=[\n",
+    "            {\n",
+    "                \"name\": \"joke\",\n",
+    "                \"args\": {\n",
+    "                    \"setup\": \"Caterpillar\",\n",
+    "                    \"punchline\": \"Caterpillar really slow, but watch me turn into a butterfly and steal the show!\",\n",
+    "                    \"rating\": 5,\n",
+    "                },\n",
+    "                \"id\": \"3\",\n",
+    "            }\n",
+    "        ],\n",
+    "    ),\n",
+    "    ToolMessage(\"\", tool_call_id=\"3\"),\n",
+    "]\n",
+    "system = \"\"\"You are a hilarious comedian. Your specialty is knock-knock jokes. \\\n",
+    "Return a joke which has the setup (the response to \"Who's there?\") \\\n",
+    "and the final punchline (the response to \"<setup> who?\").\"\"\"\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [(\"system\", system), (\"placeholder\", \"{examples}\"), (\"human\", \"{input}\")]\n",
+    ")\n",
+    "few_shot_structured_llm = prompt | structured_llm\n",
+    "few_shot_structured_llm.invoke({\"input\": \"crocodiles\", \"examples\": examples})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "498d893b-ceaa-47ff-a9d8-4faa60702715",
+   "metadata": {},
+   "source": [
+    "For more on few shot prompting when using tool calling, see [here](/docs/how_to/function_calling/#Few-shot-prompting)."
   ]
  },
  {
@@ -239,9 +469,17 @@
   "id": "39d7a555",
   "metadata": {},
   "source": [
-    "### Specifying the output method (Advanced)\n",
+    "### (Advanced) Specifying the method for structuring outputs\n",
    "\n",
-    "For models that support more than one means of outputting data, you can specify the preferred one like this:"
+    "For models that support more than one means of structuring outputs (i.e., they support both tool calling and JSON mode), you can specify which method to use with the `method=` argument.\n",
+    "\n",
+    ":::info JSON mode\n",
+    "\n",
+    "If using JSON mode you'll have to still specify the desired schema in the model prompt. The schema you pass to `with_structured_output` will only be used for parsing the model outputs, it will not be passed to the model the way it is with tool calling.\n",
+    "\n",
+    "To see if the model you're using supports JSON mode, check its entry in the [API reference](https://api.python.langchain.com/en/latest/langchain_api_reference.html).\n",
+    "\n",
+    ":::"
   ]
  },
  {
@@ -253,7 +491,7 @@
    {
     "data": {
      "text/plain": [
-       "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!')"
+       "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=None)"
      ]
     },
     "execution_count": 6,
@@ -262,7 +500,7 @@
    }
   ],
   "source": [
-    "structured_llm = model.with_structured_output(Joke, method=\"json_mode\")\n",
+    "structured_llm = llm.with_structured_output(Joke, method=\"json_mode\")\n",
    "\n",
    "structured_llm.invoke(\n",
    "    \"Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys\"\n",
@@ -274,13 +512,9 @@
   "id": "5e92a98a",
   "metadata": {},
   "source": [
-    "In the above example, we use OpenAI's alternate JSON mode capability along with a more specific prompt.\n",
+    "## Prompting and parsing model directly\n",
    "\n",
-    "For specifics about the model you choose, peruse its entry in the [API reference pages](https://api.python.langchain.com/en/latest/langchain_api_reference.html).\n",
-    "\n",
-    "## Prompting techniques\n",
-    "\n",
-    "You can also prompt models to outputting information in a given format. This approach relies on designing good prompts and then parsing the output of the models. This is the only option for models that don't support `.with_structured_output()` or other built-in approaches.\n",
+    "Not all models support `.with_structured_output()`, since not all models have tool calling or JSON mode support. For such models you'll need to directly prompt the model to use a specific format, and use an output parser to extract the structured response from the raw model output.\n",
    "\n",
    "### Using `PydanticOutputParser`\n",
    "\n",
@@ -289,14 +523,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 31,
   "id": "6e514455",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List\n",
    "\n",
-    "from langchain.output_parsers import PydanticOutputParser\n",
+    "from langchain_core.output_parsers import PydanticOutputParser\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
    "\n",
@@ -341,7 +575,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 37,
   "id": "3d73d33d",
   "metadata": {},
   "outputs": [
@@ -366,7 +600,7 @@
   "source": [
    "query = \"Anna is 23 years old and she is 6 feet tall\"\n",
    "\n",
-    "print(prompt.format_prompt(query=query).to_string())"
+    "print(prompt.invoke(query).to_string())"
   ]
  },
  {
@@ -395,7 +629,7 @@
    }
   ],
   "source": [
-    "chain = prompt | model | parser\n",
+    "chain = prompt | llm | parser\n",
    "\n",
    "chain.invoke({\"query\": query})"
   ]
@@ -538,35 +772,17 @@
    }
   ],
   "source": [
-    "chain = prompt | model | extract_json\n",
+    "chain = prompt | llm | extract_json\n",
    "\n",
    "chain.invoke({\"query\": query})"
   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7a39221a",
-   "metadata": {},
-   "source": [
-    "## Next steps\n",
-    "\n",
-    "Now you've learned a few methods to make a model output structured data.\n",
-    "\n",
-    "To learn more, check out the other how-to guides in this section, or the conceptual guide on tool calling."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6e3759e2",
-   "metadata": {},
-   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "poetry-venv-2",
   "language": "python",
-   "name": "python3"
+   "name": "poetry-venv-2"
  },
  "language_info": {
   "codemirror_mode": {
@@ -578,7 +794,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/tool_calling.ipynb
+++ b/docs/docs/how_to/tool_calling.ipynb
@@ -6,6 +6,14 @@
   "source": [
    "# How to use a chat model to call tools\n",
    "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "- [Chat models](/docs/concepts/#chat-models)\n",
+    "- [LangChain Tools](/docs/concepts/#tools)\n",
+    "\n",
+    ":::\n",
+    "\n",
    "```{=mdx}\n",
    ":::info\n",
    "We use the term tool calling interchangeably with function calling. Although\n",
@@ -40,15 +48,6 @@
    "LangChain implements standard interfaces for defining tools, passing them to LLMs, \n",
    "and representing tool calls. This guide will show you how to use them.\n",
    "\n",
-    "```{=mdx}\n",
-    "import PrerequisiteLinks from \"@theme/PrerequisiteLinks\";\n",
-    "\n",
-    "<PrerequisiteLinks content={`\n",
-    "- [Chat models](/docs/concepts/#chat-models)\n",
-    "- [LangChain Tools](/docs/concepts/#tools)\n",
-    "`} />\n",
-    "```\n",
-    "\n",
    "## Passing tools to chat models\n",
    "\n",
    "Chat models that support tool calling features implement a `.bind_tools` method, which \n",
@@ -226,7 +225,7 @@
    "are populated in the `.invalid_tool_calls` attribute. An `InvalidToolCall` can have \n",
    "a name, string arguments, identifier, and error message.\n",
    "\n",
-    "If desired, [output parsers](/docs/modules/model_io/output_parsers) can further \n",
+    "If desired, [output parsers](/docs/how_to#output-parsers) can further \n",
    "process the output. For example, we can convert back to the original Pydantic class:"
   ]
  },
@@ -309,7 +308,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Note that adding message chunks will merge their corresponding tool call chunks. This is the principle by which LangChain's various [tool output parsers](/docs/modules/model_io/output_parsers/types/openai_tools/) support streaming.\n",
+    "Note that adding message chunks will merge their corresponding tool call chunks. This is the principle by which LangChain's various [tool output parsers](/docs/how_to/output_parser_structured) support streaming.\n",
    "\n",
    "For example, below we accumulate tool call chunks:"
   ]
@@ -685,7 +684,7 @@
    "\n",
    "Now you've learned how to bind tool schemas to a chat model and to call those tools. Next, check out some more specific uses of tool calling:\n",
    "\n",
-    "- Building [tool-using chains and agents](/docs/use_cases/tool_use/)\n",
+    "- Building [tool-using chains and agents](/docs/how_to#tools)\n",
    "- Getting [structured outputs](/docs/how_to/structured_output/) from models"
   ]
  }
@@ -706,9 +705,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
--- a/docs/docs/how_to/tool_calls_multi_modal.ipynb
+++ b/docs/docs/how_to/tool_calls_multi_modal.ipynb
@@ -0,0 +1,160 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4facdf7f-680e-4d28-908b-2b8408e2a741",
+   "metadata": {},
+   "source": [
+    "# How to call tools with multi-modal data\n",
+    "\n",
+    "Here we demonstrate how to call tools with multi-modal data, such as images.\n",
+    "\n",
+    "Some multi-modal models, such as those that can reason over images or audio, support [tool calling](/docs/concepts/#functiontool-calling) features as well.\n",
+    "\n",
+    "To call tools using such models, simply bind tools to them in the [usual way](/docs/how_to/tool_calling), and invoke the model using content blocks of the desired type (e.g., containing image data).\n",
+    "\n",
+    "Below, we demonstrate examples using [OpenAI](/docs/integrations/platforms/openai) and [Anthropic](/docs/integrations/platforms/anthropic). We will use the same image and tool in all cases. Let's first select an image, and build a placeholder tool that expects as input the string \"sunny\", \"cloudy\", or \"rainy\". We will ask the models to describe the weather in the image."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0d9fd81a-b7f0-445a-8e3d-cfc2d31fdd59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Literal\n",
+    "\n",
+    "from langchain_core.tools import tool\n",
+    "\n",
+    "image_url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n",
+    "\n",
+    "\n",
+    "@tool\n",
+    "def weather_tool(weather: Literal[\"sunny\", \"cloudy\", \"rainy\"]) -> None:\n",
+    "    \"\"\"Describe the weather\"\"\"\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8656018e-c56d-47d2-b2be-71e87827f90a",
+   "metadata": {},
+   "source": [
+    "## OpenAI\n",
+    "\n",
+    "For OpenAI, we can feed the image URL directly in a content block of type \"image_url\":"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a8819cf3-5ddc-44f0-889a-19ca7b7fe77e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'name': 'weather_tool', 'args': {'weather': 'sunny'}, 'id': 'call_mRYL50MtHdeNuNIjSCm5UPmB'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_core.messages import HumanMessage\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "model = ChatOpenAI(model=\"gpt-4o\").bind_tools([weather_tool])\n",
+    "\n",
+    "message = HumanMessage(\n",
+    "    content=[\n",
+    "        {\"type\": \"text\", \"text\": \"describe the weather in this image\"},\n",
+    "        {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n",
+    "    ],\n",
+    ")\n",
+    "response = model.invoke([message])\n",
+    "print(response.tool_calls)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e5738224-1109-4bf8-8976-ff1570dd1d46",
+   "metadata": {},
+   "source": [
+    "Note that we recover tool calls with parsed arguments in LangChain's [standard format](/docs/how_to/tool_calling) in the model response."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0cee63ff-e09f-4dd8-8323-912edbde94f6",
+   "metadata": {},
+   "source": [
+    "## Anthropic\n",
+    "\n",
+    "For Anthropic, we can format a base64-encoded image into a content block of type \"image\", as below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d90c4590-71c8-42b1-99ff-03a9eca8082e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'name': 'weather_tool', 'args': {'weather': 'sunny'}, 'id': 'toolu_016m9KfknJqx5fVRYk4tkF6s'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import base64\n",
+    "\n",
+    "import httpx\n",
+    "from langchain_anthropic import ChatAnthropic\n",
+    "\n",
+    "image_data = base64.b64encode(httpx.get(image_url).content).decode(\"utf-8\")\n",
+    "\n",
+    "model = ChatAnthropic(model=\"claude-3-sonnet-20240229\").bind_tools([weather_tool])\n",
+    "\n",
+    "message = HumanMessage(\n",
+    "    content=[\n",
+    "        {\"type\": \"text\", \"text\": \"describe the weather in this image\"},\n",
+    "        {\n",
+    "            \"type\": \"image\",\n",
+    "            \"source\": {\n",
+    "                \"type\": \"base64\",\n",
+    "                \"media_type\": \"image/jpeg\",\n",
+    "                \"data\": image_data,\n",
+    "            },\n",
+    "        },\n",
+    "    ],\n",
+    ")\n",
+    "response = model.invoke([message])\n",
+    "print(response.tool_calls)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/how_to/tools.ipynb
+++ b/docs/docs/how_to/tools.ipynb
@@ -31,7 +31,7 @@
    "\n",
    "The simpler the input to a tool is, the easier it is for an LLM to be able to use it.\n",
    "Many agents will only work with tools that have a single string input.\n",
-    "For a list of agent types and which ones work with more complicated inputs, please see [this documentation](../agents/agent_types)\n",
+    "For a list of agent types and which ones work with more complicated inputs, please see [this documentation](https://python.langchain.com/v0.1/docs/modules/agents/agent_types/)\n",
    "\n",
    "Importantly, the name, description, and JSON schema (if used) are all used in the prompt. Therefore, it is really important that they are clear and describe exactly how the tool should be used. You may need to change the default name, description, or JSON schema if the LLM is not understanding how to use the tool.\n",
    "\n",
@@ -409,11 +409,11 @@
    "\n",
    "**[Built-In Tools](/docs/integrations/tools/)**: For a list of all built-in tools, see [this page](/docs/integrations/tools/)\n",
    "    \n",
-    "**[Custom Tools](./custom_tools)**: Although built-in tools are useful, it's highly likely that you'll have to define your own tools. See [this guide](./custom_tools) for instructions on how to do so.\n",
+    "**[Custom Tools](/docs/how_to/custom_tools)**: Although built-in tools are useful, it's highly likely that you'll have to define your own tools. See [this guide](/docs/how_to/custom_tools) for instructions on how to do so.\n",
    "    \n",
-    "**[Toolkits](./toolkits)**: Toolkits are collections of tools that work well together. For a more in depth description as well as a list of all built-in toolkits, see [this page](./toolkits)\n",
+    "**[Toolkits](/docs/how_to/toolkits)**: Toolkits are collections of tools that work well together. For a more in depth description as well as a list of all built-in toolkits, see [this page](/docs/how_to/toolkits)\n",
    "\n",
-    "**[Tools as OpenAI Functions](./tools_as_openai_functions)**: Tools are very similar to OpenAI Functions, and can easily be converted to that format. See [this notebook](./tools_as_openai_functions) for instructions on how to do that.\n",
+    "**[Tools as OpenAI Functions](/docs/how_to/tools_as_openai_functions/)**: Tools are very similar to OpenAI Functions, and can easily be converted to that format. See [this notebook](/docs/how_to/tools_as_openai_functions) for instructions on how to do that.\n",
    "\n"
   ]
  },
--- a/docs/docs/how_to/tools_chain.ipynb
+++ b/docs/docs/how_to/tools_chain.ipynb
@@ -278,7 +278,7 @@
    "\n",
    "Chains are great when we know the specific sequence of tool usage needed for any user input. But for certain use cases, how many times we use tools depends on the input. In these cases, we want to let the model itself decide how many times to use tools and in what order. [Agents](/docs/tutorials/agents) let us do just this.\n",
    "\n",
-    "LangChain comes with a number of built-in agents that are optimized for different use cases. Read about all the [agent types here](/docs/modules/agents/agent_types/).\n",
+    "LangChain comes with a number of built-in agents that are optimized for different use cases. Read about all the [agent types here](/docs/concepts#agents).\n",
    "\n",
    "We'll use the [tool calling agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.tool_calling_agent.base.create_tool_calling_agent.html), which is generally the most reliable kind and the recommended one for most use cases.\n",
    "\n",
@@ -335,7 +335,7 @@
   "id": "616f9714-5b18-4eed-b88a-d38e4cb1de99",
   "metadata": {},
   "source": [
-    "Agents are also great because they make it easy to use multiple tools. To learn how to build Chains that use multiple tools, check out the [Chains with multiple tools](/docs/use_cases/tool_use/multiple_tools) page."
+    "Agents are also great because they make it easy to use multiple tools. To learn how to build Chains that use multiple tools, check out the [Chains with multiple tools](/docs/how_to/tools_multiple) page."
   ]
  },
  {
--- a/docs/docs/how_to/tools_multiple.ipynb
+++ b/docs/docs/how_to/tools_multiple.ipynb
@@ -17,7 +17,7 @@
   "source": [
    "# How to use an LLM to choose between multiple tools\n",
    "\n",
-    "In our [Quickstart](/docs/use_cases/tool_use/quickstart) we went over how to build a Chain that calls a single `multiply` tool. Now let's take a look at how we might augment this chain so that it can pick from a number of tools to call. We'll focus on Chains since [Agents](/docs/tutorials/agents) can route between multiple tools by default."
+    "In our [Quickstart](/docs/how_to/tool_calling) we went over how to build a Chain that calls a single `multiply` tool. Now let's take a look at how we might augment this chain so that it can pick from a number of tools to call. We'll focus on Chains since [Agents](/docs/tutorials/agents) can route between multiple tools by default."
   ]
  },
  {
@@ -120,7 +120,7 @@
   "id": "bbea4555-ed10-4a18-b802-e9a3071f132b",
   "metadata": {},
   "source": [
-    "The main difference between using one Tool and many is that we can't be sure which Tool the model will invoke upfront, so we cannot hardcode, like we did in the [Quickstart](/docs/use_cases/tool_use/quickstart), a specific tool into our chain. Instead we'll add `call_tools`, a `RunnableLambda` that takes the output AI message with tools calls and routes to the correct tools.\n",
+    "The main difference between using one Tool and many is that we can't be sure which Tool the model will invoke upfront, so we cannot hardcode, like we did in the [Quickstart](/docs/how_to/tool_calling), a specific tool into our chain. Instead we'll add `call_tools`, a `RunnableLambda` that takes the output AI message with tools calls and routes to the correct tools.\n",
    "\n",
    "```{=mdx}\n",
    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
--- a/docs/docs/how_to/tools_parallel.ipynb
+++ b/docs/docs/how_to/tools_parallel.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# How to call tools in parallel\n",
    "\n",
-    "In the [Chains with multiple tools](/docs/use_cases/tool_use/multiple_tools) guide we saw how to build function-calling chains that select between multiple tools. Some models, like the OpenAI models released in Fall 2023, also support parallel function calling, which allows you to invoke multiple functions (or the same function multiple times) in a single model call. Our previous chain from the multiple tools guides actually already supports this."
+    "In the [Chains with multiple tools](/docs/how_to/tools_multiple) guide we saw how to build function-calling chains that select between multiple tools. Some models, like the OpenAI models released in Fall 2023, also support parallel function calling, which allows you to invoke multiple functions (or the same function multiple times) in a single model call. Our previous chain from the multiple tools guides actually already supports this."
   ]
  },
  {
--- a/docs/docs/how_to/tools_prompting.ipynb
+++ b/docs/docs/how_to/tools_prompting.ipynb
@@ -17,7 +17,7 @@
   "source": [
    "# How to use tools without function calling\n",
    "\n",
-    "In this guide we'll build a Chain that does not rely on any special model APIs (like tool calling, which we showed in the [Quickstart](/docs/use_cases/tool_use/quickstart)) and instead just prompts the model directly to invoke tools."
+    "In this guide we'll build a Chain that does not rely on any special model APIs (like tool calling, which we showed in the [Quickstart](/docs/how_to/tool_calling)) and instead just prompts the model directly to invoke tools."
   ]
  },
  {
--- a/docs/docs/how_to/vectorstore_retriever.ipynb
+++ b/docs/docs/how_to/vectorstore_retriever.ipynb
@@ -20,57 +20,74 @@
    "A vector store retriever is a retriever that uses a vector store to retrieve documents. It is a lightweight wrapper around the vector store class to make it conform to the retriever interface.\n",
    "It uses the search methods implemented by a vector store, like similarity search and MMR, to query the texts in the vector store.\n",
    "\n",
-    "Once you construct a vector store, it's very easy to construct a retriever. Let's walk through an example.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "103dbfe3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.document_loaders import TextLoader\n",
+    "In this guide we will cover:\n",
    "\n",
-    "loader = TextLoader(\"../../state_of_the_union.txt\")"
+    "1. How to instantiate a retriever from a vectorstore;\n",
+    "2. How to specify the search type for the retriever;\n",
+    "3. How to specify additional search parameters, such as threshold scores and top-k.\n",
+    "\n",
+    "## Creating a retriever from a vectorstore\n",
+    "\n",
+    "You can build a retriever from a vectorstore using its [.as_retriever](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.as_retriever) method. Let's walk through an example.\n",
+    "\n",
+    "First we instantiate a vectorstore. We will use an in-memory [FAISS](https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.faiss.FAISS.html) vectorstore:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
   "id": "174e3c69",
   "metadata": {},
   "outputs": [],
   "source": [
+    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain_text_splitters import CharacterTextSplitter\n",
    "\n",
+    "loader = TextLoader(\"state_of_the_union.txt\")\n",
+    "\n",
    "documents = loader.load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "texts = text_splitter.split_documents(documents)\n",
    "embeddings = OpenAIEmbeddings()\n",
-    "db = FAISS.from_documents(texts, embeddings)"
+    "vectorstore = FAISS.from_documents(texts, embeddings)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6f6e65a1-5eb4-4165-b06b-9bb40624a8d8",
+   "metadata": {},
+   "source": [
+    "We can then instantiate a retriever:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 2,
   "id": "52df5f55",
   "metadata": {},
   "outputs": [],
   "source": [
-    "retriever = db.as_retriever()"
+    "retriever = vectorstore.as_retriever()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "08f8b820-5912-49c1-9d76-40be0571dffb",
+   "metadata": {},
+   "source": [
+    "This creates a retriever (specifically a [VectorStoreRetriever](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStoreRetriever.html)), which we can use in the usual way:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 3,
   "id": "32334fda",
   "metadata": {},
   "outputs": [],
   "source": [
-    "docs = retriever.get_relevant_documents(\"what did he say about ketanji brown jackson\")"
+    "docs = retriever.invoke(\"what did the president say about ketanji brown jackson?\")"
   ]
  },
  {
@@ -80,27 +97,28 @@
   "source": [
    "## Maximum marginal relevance retrieval\n",
    "By default, the vector store retriever uses similarity search. If the underlying vector store supports maximum marginal relevance search, you can specify that as the search type.\n",
-    "\n"
+    "\n",
+    "This effectively specifies what method on the underlying vectorstore is used (e.g., `similarity_search`, `max_marginal_relevance_search`, etc.)."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 4,
   "id": "b286ac04",
   "metadata": {},
   "outputs": [],
   "source": [
-    "retriever = db.as_retriever(search_type=\"mmr\")"
+    "retriever = vectorstore.as_retriever(search_type=\"mmr\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 5,
   "id": "07f937f7",
   "metadata": {},
   "outputs": [],
   "source": [
-    "docs = retriever.get_relevant_documents(\"what did he say about ketanji brown jackson\")"
+    "docs = retriever.invoke(\"what did the president say about ketanji brown jackson?\")"
   ]
  },
  {
@@ -108,32 +126,35 @@
   "id": "6ce77789",
   "metadata": {},
   "source": [
+    "## Passing search parameters\n",
    "\n",
-    "## Similarity score threshold retrieval\n",
+    "We can pass parameters to the underlying vectorstore's search methods using `search_kwargs`.\n",
    "\n",
-    "You can also set a retrieval method that sets a similarity score threshold and only returns documents with a score above that threshold."
+    "### Similarity score threshold retrieval\n",
+    "\n",
+    "For example, we can set a similarity score threshold and only return documents with a score above that threshold."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 6,
   "id": "dbb38a03",
   "metadata": {},
   "outputs": [],
   "source": [
-    "retriever = db.as_retriever(\n",
+    "retriever = vectorstore.as_retriever(\n",
    "    search_type=\"similarity_score_threshold\", search_kwargs={\"score_threshold\": 0.5}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 7,
   "id": "56f6c9ae",
   "metadata": {},
   "outputs": [],
   "source": [
-    "docs = retriever.get_relevant_documents(\"what did he say about ketanji brown jackson\")"
+    "docs = retriever.invoke(\"what did the president say about ketanji brown jackson?\")"
   ]
  },
  {
@@ -141,24 +162,24 @@
   "id": "329f5b26",
   "metadata": {},
   "source": [
+    "### Specifying top k\n",
    "\n",
-    "## Specifying top k\n",
-    "You can also specify search kwargs like `k` to use when doing retrieval.\n"
+    "We can also limit the number of documents `k` returned by the retriever."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 8,
   "id": "d712c91d",
   "metadata": {},
   "outputs": [],
   "source": [
-    "retriever = db.as_retriever(search_kwargs={\"k\": 1})"
+    "retriever = vectorstore.as_retriever(search_kwargs={\"k\": 1})"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
   "id": "a79b573b",
   "metadata": {},
   "outputs": [
@@ -168,23 +189,15 @@
       "1"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "docs = retriever.get_relevant_documents(\"what did he say about ketanji brown jackson\")\n",
+    "docs = retriever.invoke(\"what did the president say about ketanji brown jackson?\")\n",
    "len(docs)"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3d3b34eb",
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
@@ -203,7 +216,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.1"
+   "version": "3.10.4"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/callbacks/trubrics.ipynb
+++ b/docs/docs/integrations/callbacks/trubrics.ipynb
@@ -124,7 +124,7 @@
    "tags": []
   },
   "source": [
-    "Here are two examples of how to use the `TrubricsCallbackHandler` with Langchain [LLMs](/docs/modules/model_io/llms/) or [Chat Models](/docs/modules/model_io/chat/). We will use OpenAI models, so set your `OPENAI_API_KEY` key here:"
+    "Here are two examples of how to use the `TrubricsCallbackHandler` with Langchain [LLMs](/docs/how_to#llms) or [Chat Models](/docs/how_to#chat-models). We will use OpenAI models, so set your `OPENAI_API_KEY` key here:"
   ]
  },
  {
--- a/docs/docs/integrations/callbacks/uptrain.ipynb
+++ b/docs/docs/integrations/callbacks/uptrain.ipynb
@@ -135,7 +135,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
+    "loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
    "documents = loader.load()"
   ]
  },
--- a/docs/docs/integrations/chat/cohere.ipynb
+++ b/docs/docs/integrations/chat/cohere.ipynb
@@ -77,7 +77,7 @@
   "source": [
    "## Usage\n",
    "\n",
-    "ChatCohere supports all [ChatModel](/docs/modules/model_io/chat/) functionality:"
+    "ChatCohere supports all [ChatModel](/docs/how_to#chat-models) functionality:"
   ]
  },
  {
@@ -201,7 +201,7 @@
   "source": [
    "## Chaining\n",
    "\n",
-    "You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/expression_language)"
+    "You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/concepts#langchain-expression-language)"
   ]
  },
  {
--- a/docs/docs/integrations/chat/friendli.ipynb
+++ b/docs/docs/integrations/chat/friendli.ipynb
@@ -71,7 +71,7 @@
   "source": [
    "## Usage\n",
    "\n",
-    "`FrienliChat` supports all methods of [`ChatModel`](/docs/modules/model_io/chat/) including async APIs."
+    "`FrienliChat` supports all methods of [`ChatModel`](/docs/how_to#chat-models) including async APIs."
   ]
  },
  {
--- a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb
+++ b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb
@@ -509,7 +509,7 @@
   "source": [
    "## Asynchronous calls\n",
    "\n",
-    "We can make asynchronous calls via the Runnables [Async Interface](/docs/expression_language/interface)."
+    "We can make asynchronous calls via the Runnables [Async Interface](/docs/concepts#interface)."
   ]
  },
  {
--- a/docs/docs/integrations/chat/huggingface.ipynb
+++ b/docs/docs/integrations/chat/huggingface.ipynb
@@ -9,9 +9,10 @@
    "This notebook shows how to get started using `Hugging Face` LLM's as chat models.\n",
    "\n",
    "In particular, we will:\n",
-    "1. Utilize the [HuggingFaceTextGenInference](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_text_gen_inference.py), [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_endpoint.py), or [HuggingFaceHub](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_hub.py) integrations to instantiate an `LLM`.\n",
-    "2. Utilize the `ChatHuggingFace` class to enable any of these LLMs to interface with LangChain's [Chat Messages](/docs/modules/model_io/chat/#messages) abstraction.\n",
-    "3. Demonstrate how to use an open-source LLM to power an `ChatAgent` pipeline\n",
+    "1. Utilize the [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_endpoint.py) integrations to instantiate an `LLM`.\n",
+    "2. Utilize the `ChatHuggingFace` class to enable any of these LLMs to interface with LangChain's [Chat Messages](/docs/concepts/#message-types) abstraction.\n",
+    "3. Explore tool calling with the `ChatHuggingFace`.\n",
+    "4. Demonstrate how to use an open-source LLM to power an `ChatAgent` pipeline\n",
    "\n",
    "\n",
    "> Note: To get started, you'll need to have a [Hugging Face Access Token](https://huggingface.co/docs/hub/security-tokens) saved as an environment variable: `HUGGINGFACEHUB_API_TOKEN`."
@@ -21,61 +22,16 @@
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
-   "source": [
-    "%pip install --upgrade --quiet  text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 1. Instantiate an LLM\n",
-    "\n",
-    "There are three LLM options to choose from."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### `HuggingFaceTextGenInference`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
   "outputs": [],
   "source": [
-    "import os\n",
-    "\n",
-    "from langchain_community.llms import HuggingFaceTextGenInference\n",
-    "\n",
-    "ENDPOINT_URL = \"<YOUR_ENDPOINT_URL_HERE>\"\n",
-    "HF_TOKEN = os.getenv(\"HUGGINGFACEHUB_API_TOKEN\")\n",
-    "\n",
-    "llm = HuggingFaceTextGenInference(\n",
-    "    inference_server_url=ENDPOINT_URL,\n",
-    "    max_new_tokens=512,\n",
-    "    top_k=50,\n",
-    "    temperature=0.1,\n",
-    "    repetition_penalty=1.03,\n",
-    "    server_kwargs={\n",
-    "        \"headers\": {\n",
-    "            \"Authorization\": f\"Bearer {HF_TOKEN}\",\n",
-    "            \"Content-Type\": \"application/json\",\n",
-    "        }\n",
-    "    },\n",
-    ")"
+    "%pip install --upgrade --quiet  langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Instantiate an LLM"
   ]
  },
  {
@@ -87,58 +43,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.llms import HuggingFaceEndpoint\n",
+    "from langchain_huggingface import HuggingFaceEndpoint\n",
    "\n",
-    "ENDPOINT_URL = \"<YOUR_ENDPOINT_URL_HERE>\"\n",
    "llm = HuggingFaceEndpoint(\n",
-    "    endpoint_url=ENDPOINT_URL,\n",
+    "    repo_id=\"meta-llama/Meta-Llama-3-70B-Instruct\",\n",
    "    task=\"text-generation\",\n",
-    "    model_kwargs={\n",
-    "        \"max_new_tokens\": 512,\n",
-    "        \"top_k\": 50,\n",
-    "        \"temperature\": 0.1,\n",
-    "        \"repetition_penalty\": 1.03,\n",
-    "    },\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### `HuggingFaceHub`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jacoblee/langchain/langchain/libs/langchain/.venv/lib/python3.10/site-packages/huggingface_hub/utils/_deprecation.py:127: FutureWarning: '__init__' (from 'huggingface_hub.inference_api') is deprecated and will be removed from version '1.0'. `InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out this guide to learn how to convert your script to use it: https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client.\n",
-      "  warnings.warn(warning_message, FutureWarning)\n"
-     ]
-    }
-   ],
-   "source": [
-    "from langchain_community.llms import HuggingFaceHub\n",
-    "\n",
-    "llm = HuggingFaceHub(\n",
-    "    repo_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
-    "    task=\"text-generation\",\n",
-    "    model_kwargs={\n",
-    "        \"max_new_tokens\": 512,\n",
-    "        \"top_k\": 30,\n",
-    "        \"temperature\": 0.1,\n",
-    "        \"repetition_penalty\": 1.03,\n",
-    "    },\n",
+    "    max_new_tokens=512,\n",
+    "    do_sample=False,\n",
+    "    repetition_penalty=1.03,\n",
    ")"
   ]
  },
@@ -153,37 +69,30 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Instantiate the chat model and some messages to pass."
+    "Instantiate the chat model and some messages to pass. \n",
+    "\n",
+    "**Note**: you need to pass the `model_id` explicitly if you are using self-hosted `text-generation-inference`"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "WARNING! repo_id is not default parameter.\n",
-      "                    repo_id was transferred to model_kwargs.\n",
-      "                    Please confirm that repo_id is what you intended.\n",
-      "WARNING! task is not default parameter.\n",
-      "                    task was transferred to model_kwargs.\n",
-      "                    Please confirm that task is what you intended.\n",
-      "WARNING! huggingfacehub_api_token is not default parameter.\n",
-      "                    huggingfacehub_api_token was transferred to model_kwargs.\n",
-      "                    Please confirm that huggingfacehub_api_token is what you intended.\n",
-      "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
+      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
-    "from langchain.schema import (\n",
+    "from langchain_core.messages import (\n",
    "    HumanMessage,\n",
    "    SystemMessage,\n",
    ")\n",
-    "from langchain_community.chat_models.huggingface import ChatHuggingFace\n",
+    "from langchain_huggingface import ChatHuggingFace\n",
    "\n",
    "messages = [\n",
    "    SystemMessage(content=\"You're a helpful assistant\"),\n",
@@ -199,21 +108,21 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Inspect which model and corresponding chat template is being used."
+    "Check the `model_id`"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "'HuggingFaceH4/zephyr-7b-beta'"
+       "'meta-llama/Meta-Llama-3-70B-Instruct'"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -231,16 +140,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "\"<|system|>\\nYou're a helpful assistant</s>\\n<|user|>\\nWhat happens when an unstoppable force meets an immovable object?</s>\\n<|assistant|>\\n\""
+       "\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nYou're a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nWhat happens when an unstoppable force meets an immovable object?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n\""
      ]
     },
-     "execution_count": 7,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -258,14 +167,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "According to a popular philosophical paradox, when an unstoppable force meets an immovable object, it is impossible to determine which one will prevail because both are defined as being completely unyielding and unmovable. The paradox suggests that the very concepts of \"unstoppable force\" and \"immovable object\" are inherently contradictory, and therefore, it is illogical to imagine a scenario where they would meet and interact. However, in practical terms, it is highly unlikely for such a scenario to occur in the real world, as the concepts of \"unstoppable force\" and \"immovable object\" are often used metaphorically to describe hypothetical situations or abstract concepts, rather than physical objects or forces.\n"
+      "One of the classic thought experiments in physics!\n",
+      "\n",
+      "The concept of an unstoppable force meeting an immovable object is a paradox that has puzzled philosophers and physicists for centuries. It's a mind-bending scenario that challenges our understanding of the fundamental laws of physics.\n",
+      "\n",
+      "In essence, an unstoppable force is something that cannot be halted or slowed down, while an immovable object is something that cannot be moved or displaced. If we assume that both entities exist in the same universe, we run into a logical contradiction.\n",
+      "\n",
+      "Here\n"
     ]
    }
   ],
@@ -278,16 +193,80 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## 3. Take it for a spin as an agent!\n",
+    "## 3. Explore the tool calling with `ChatHuggingFace`\n",
    "\n",
-    "Here we'll test out `Zephyr-7B-beta` as a zero-shot `ReAct` Agent. The example below is taken from [here](/docs/modules/agents/agent_types/react#using-chat-models).\n",
+    "`text-generation-inference` supports tool with open source LLMs starting from v2.0.1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a basic tool (`Calculator`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "\n",
+    "\n",
+    "class Calculator(BaseModel):\n",
+    "    \"\"\"Multiply two integers together.\"\"\"\n",
+    "\n",
+    "    a: int = Field(..., description=\"First integer\")\n",
+    "    b: int = Field(..., description=\"Second integer\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Bind the tool to the `chat_model` and give it a try:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Calculator(a=3, b=12)]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.output_parsers.openai_tools import PydanticToolsParser\n",
+    "\n",
+    "llm_with_multiply = chat_model.bind_tools([Calculator], tool_choice=\"auto\")\n",
+    "parser = PydanticToolsParser(tools=[Calculator])\n",
+    "tool_chain = llm_with_multiply | parser\n",
+    "tool_chain.invoke(\"How much is 3 multiplied by 12?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Take it for a spin as an agent!\n",
+    "\n",
+    "Here we'll test out `Zephyr-7B-beta` as a zero-shot `ReAct` Agent. The example below is taken from [here](https://python.langchain.com/v0.1/docs/modules/agents/agent_types/react/#using-chat-models).\n",
    "\n",
    "> Note: To run this section, you'll need to have a [SerpAPI Token](https://serpapi.com/) saved as an environment variable: `SERPAPI_API_KEY`"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -310,7 +289,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -342,7 +321,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
--- a/docs/docs/integrations/chat/llama2_chat.ipynb
+++ b/docs/docs/integrations/chat/llama2_chat.ipynb
@@ -17,9 +17,9 @@
   "source": [
    "# Llama2Chat\n",
    "\n",
-    "This notebook shows how to augment Llama-2 `LLM`s with the `Llama2Chat` wrapper to support the [Llama-2 chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). Several `LLM` implementations in LangChain can be used as interface to Llama-2 chat models. These include [ChatHuggingFace](/docs/integrations/chat/huggingface), [LlamaCpp](/docs/use_cases/question_answering/local_retrieval_qa), [GPT4All](/docs/integrations/llms/gpt4all), ..., to mention a few examples. \n",
+    "This notebook shows how to augment Llama-2 `LLM`s with the `Llama2Chat` wrapper to support the [Llama-2 chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). Several `LLM` implementations in LangChain can be used as interface to Llama-2 chat models. These include [ChatHuggingFace](/docs/integrations/chat/huggingface), [LlamaCpp](/docs/tutorials/local_rag), [GPT4All](/docs/integrations/llms/gpt4all), ..., to mention a few examples. \n",
    "\n",
-    "`Llama2Chat` is a generic wrapper that implements `BaseChatModel` and can therefore be used in applications as [chat model](/docs/modules/model_io/chat/). `Llama2Chat` converts a list of Messages into the [required chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2) and forwards the formatted prompt as `str` to the wrapped `LLM`."
+    "`Llama2Chat` is a generic wrapper that implements `BaseChatModel` and can therefore be used in applications as [chat model](/docs/how_to#chat-models). `Llama2Chat` converts a list of Messages into the [required chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2) and forwards the formatted prompt as `str` to the wrapped `LLM`."
   ]
  },
  {
--- a/docs/docs/integrations/chat/mistralai.ipynb
+++ b/docs/docs/integrations/chat/mistralai.ipynb
@@ -225,7 +225,7 @@
   "source": [
    "## Chaining\n",
    "\n",
-    "You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/expression_language)"
+    "You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/concepts#langchain-expression-language)"
   ]
  },
  {
--- a/docs/docs/integrations/chat/nvidia_ai_endpoints.ipynb
+++ b/docs/docs/integrations/chat/nvidia_ai_endpoints.ipynb
--- a/docs/docs/integrations/chat/ollama.ipynb
+++ b/docs/docs/integrations/chat/ollama.ipynb
@@ -185,7 +185,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Take a look at the [LangChain Expressive Language (LCEL) Interface](/docs/expression_language/interface) for the other available interfaces for use when a chain is created.\n",
+    "Take a look at the [LangChain Expressive Language (LCEL) Interface](/docs/concepts#interface) for the other available interfaces for use when a chain is created.\n",
    "\n",
    "## Building from source\n",
    "\n",
--- a/docs/docs/integrations/document_loaders/google_bigtable.ipynb
+++ b/docs/docs/integrations/document_loaders/google_bigtable.ipynb
@@ -8,7 +8,7 @@
    "\n",
    "> [Bigtable](https://cloud.google.com/bigtable) is a key-value and wide-column store, ideal for fast access to structured, semi-structured, or unstructured data. Extend your database application to build AI-powered experiences leveraging Bigtable's Langchain integrations.\n",
    "\n",
-    "This notebook goes over how to use [Bigtable](https://cloud.google.com/bigtable) to [save, load and delete langchain documents](/docs/modules/data_connection/document_loaders/) with `BigtableLoader` and `BigtableSaver`.\n",
+    "This notebook goes over how to use [Bigtable](https://cloud.google.com/bigtable) to [save, load and delete langchain documents](/docs/how_to#document-loaders) with `BigtableLoader` and `BigtableSaver`.\n",
    "\n",
    "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-bigtable-python/).\n",
    "\n",
--- a/docs/docs/integrations/document_loaders/google_cloud_sql_mssql.ipynb
+++ b/docs/docs/integrations/document_loaders/google_cloud_sql_mssql.ipynb
@@ -8,7 +8,7 @@
    "\n",
    "> [Cloud SQL](https://cloud.google.com/sql) is a fully managed relational database service that offers high performance, seamless integration, and impressive scalability. It offers [MySQL](https://cloud.google.com/sql/mysql), [PostgreSQL](https://cloud.google.com/sql/postgres), and [SQL Server](https://cloud.google.com/sql/sqlserver) database engines. Extend your database application to build AI-powered experiences leveraging Cloud SQL's Langchain integrations.\n",
    "\n",
-    "This notebook goes over how to use [Cloud SQL for SQL server](https://cloud.google.com/sql/sqlserver) to [save, load and delete langchain documents](/docs/modules/data_connection/document_loaders/) with `MSSQLLoader` and `MSSQLDocumentSaver`.\n",
+    "This notebook goes over how to use [Cloud SQL for SQL server](https://cloud.google.com/sql/sqlserver) to [save, load and delete langchain documents](/docs/how_to#document-loaders) with `MSSQLLoader` and `MSSQLDocumentSaver`.\n",
    "\n",
    "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-cloud-sql-mssql-python/).\n",
    "\n",
--- a/docs/docs/integrations/document_loaders/google_cloud_sql_mysql.ipynb
+++ b/docs/docs/integrations/document_loaders/google_cloud_sql_mysql.ipynb
@@ -8,7 +8,7 @@
    "\n",
    "> [Cloud SQL](https://cloud.google.com/sql) is a fully managed relational database service that offers high performance, seamless integration, and impressive scalability. It offers [MySQL](https://cloud.google.com/sql/mysql), [PostgreSQL](https://cloud.google.com/sql/postgresql), and [SQL Server](https://cloud.google.com/sql/sqlserver) database engines. Extend your database application to build AI-powered experiences leveraging Cloud SQL's Langchain integrations.\n",
    "\n",
-    "This notebook goes over how to use [Cloud SQL for MySQL](https://cloud.google.com/sql/mysql) to [save, load and delete langchain documents](/docs/modules/data_connection/document_loaders/) with `MySQLLoader` and `MySQLDocumentSaver`.\n",
+    "This notebook goes over how to use [Cloud SQL for MySQL](https://cloud.google.com/sql/mysql) to [save, load and delete langchain documents](/docs/how_to#document-loaders) with `MySQLLoader` and `MySQLDocumentSaver`.\n",
    "\n",
    "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-cloud-sql-mysql-python/).\n",
    "\n",
--- a/docs/docs/integrations/document_loaders/google_datastore.ipynb
+++ b/docs/docs/integrations/document_loaders/google_datastore.ipynb
@@ -8,7 +8,7 @@
        "\n",
        "> [Firestore in Datastore Mode](https://cloud.google.com/datastore) is a NoSQL document database built for automatic scaling, high performance and ease of application development. Extend your database application to build AI-powered experiences leveraging Datastore's Langchain integrations.\n",
        "\n",
-        "This notebook goes over how to use [Firestore in Datastore Mode](https://cloud.google.com/datastore) to [save, load and delete langchain documents](/docs/modules/data_connection/document_loaders/) with `DatastoreLoader` and `DatastoreSaver`.\n",
+        "This notebook goes over how to use [Firestore in Datastore Mode](https://cloud.google.com/datastore) to [save, load and delete langchain documents](/docs/how_to#document-loaders) with `DatastoreLoader` and `DatastoreSaver`.\n",
        "\n",
        "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-datastore-python/).\n",
        "\n",
--- a/docs/docs/integrations/document_loaders/google_el_carro.ipynb
+++ b/docs/docs/integrations/document_loaders/google_el_carro.ipynb
@@ -18,7 +18,7 @@
        "by leveraging the El Carro Langchain integration.\n",
        "\n",
        "This guide goes over how to use El Carro Langchain integration to\n",
-        "[save, load and delete langchain documents](/docs/modules/data_connection/document_loaders/)\n",
+        "[save, load and delete langchain documents](/docs/how_to#document-loaders)\n",
        "with `ElCarroLoader` and `ElCarroDocumentSaver`. This integration works for any Oracle database, regardless of where it is running.\n",
        "\n",
        "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-el-carro-python/).\n",
--- a/docs/docs/integrations/document_loaders/google_firestore.ipynb
+++ b/docs/docs/integrations/document_loaders/google_firestore.ipynb
@@ -8,7 +8,7 @@
    "\n",
    "> [Firestore](https://cloud.google.com/firestore) is a serverless document-oriented database that scales to meet any demand. Extend your database application to build AI-powered experiences leveraging Firestore's Langchain integrations.\n",
    "\n",
-    "This notebook goes over how to use [Firestore](https://cloud.google.com/firestore) to [save, load and delete langchain documents](/docs/modules/data_connection/document_loaders/) with `FirestoreLoader` and `FirestoreSaver`.\n",
+    "This notebook goes over how to use [Firestore](https://cloud.google.com/firestore) to [save, load and delete langchain documents](/docs/how_to#document-loaders) with `FirestoreLoader` and `FirestoreSaver`.\n",
    "\n",
    "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-firestore-python/).\n",
    "\n",
--- a/docs/docs/integrations/document_loaders/google_memorystore_redis.ipynb
+++ b/docs/docs/integrations/document_loaders/google_memorystore_redis.ipynb
@@ -10,7 +10,7 @@
    "\n",
    "> [Google Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis/memorystore-for-redis-overview) is a fully-managed service that is powered by the Redis in-memory data store to build application caches that provide sub-millisecond data access. Extend your database application to build AI-powered experiences leveraging Memorystore for Redis's Langchain integrations.\n",
    "\n",
-    "This notebook goes over how to use [Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis/memorystore-for-redis-overview) to [save, load and delete langchain documents](/docs/modules/data_connection/document_loaders/) with `MemorystoreDocumentLoader` and `MemorystoreDocumentSaver`.\n",
+    "This notebook goes over how to use [Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis/memorystore-for-redis-overview) to [save, load and delete langchain documents](/docs/how_to#document-loaders) with `MemorystoreDocumentLoader` and `MemorystoreDocumentSaver`.\n",
    "\n",
    "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-memorystore-redis-python/).\n",
    "\n",
--- a/docs/docs/integrations/document_loaders/google_spanner.ipynb
+++ b/docs/docs/integrations/document_loaders/google_spanner.ipynb
@@ -8,7 +8,7 @@
    "\n",
    "> [Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution.\n",
    "\n",
-    "This notebook goes over how to use [Spanner](https://cloud.google.com/spanner) to [save, load and delete langchain documents](/docs/modules/data_connection/document_loaders/) with `SpannerLoader` and `SpannerDocumentSaver`.\n",
+    "This notebook goes over how to use [Spanner](https://cloud.google.com/spanner) to [save, load and delete langchain documents](/docs/how_to#document-loaders) with `SpannerLoader` and `SpannerDocumentSaver`.\n",
    "\n",
    "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-spanner-python/).\n",
    "\n",
--- a/docs/docs/integrations/document_loaders/surrealdb.ipynb
+++ b/docs/docs/integrations/document_loaders/surrealdb.ipynb
@@ -143,7 +143,7 @@
     "data": {
      "text/plain": [
       "{'id': 'documents:zzz434sa584xl3b4ohvk',\n",
-       " 'source': '../../modules/state_of_the_union.txt',\n",
+       " 'source': '../../how_to/state_of_the_union.txt',\n",
       " 'ns': 'langchain',\n",
       " 'db': 'database',\n",
       " 'table': 'documents'}"
--- a/docs/docs/integrations/document_loaders/tomarkdown.ipynb
+++ b/docs/docs/integrations/document_loaders/tomarkdown.ipynb
@@ -99,9 +99,9 @@
      "\n",
      "## Get started [](\\#get-started \"Direct link to Get started\")\n",
      "\n",
-      "[Here’s](/docs/get_started/installation) how to install LangChain, set up your environment, and start building.\n",
+      "[Here’s](/docs/installation) how to install LangChain, set up your environment, and start building.\n",
      "\n",
-      "We recommend following our [Quickstart](/docs/get_started/quickstart) guide to familiarize yourself with the framework by building your first LangChain application.\n",
+      "We recommend following our [Quickstart](/docs/tutorials/llm_chain) guide to familiarize yourself with the framework by building your first LangChain application.\n",
      "\n",
      "Read up on our [Security](/docs/security) best practices to make sure you're developing safely with LangChain.\n",
      "\n",
@@ -113,8 +113,8 @@
      "\n",
      "LCEL is a declarative way to compose chains. LCEL was designed from day 1 to support putting prototypes in production, with no code changes, from the simplest “prompt + LLM” chain to the most complex chains.\n",
      "\n",
-      "- **[Overview](/docs/expression_language/)**: LCEL and its benefits\n",
-      "- **[Interface](/docs/expression_language/interface)**: The standard interface for LCEL objects\n",
+      "- **[Overview](/docs/concepts#langchain-expression-language)**: LCEL and its benefits\n",
+      "- **[Interface](/docs/concepts#interface)**: The standard interface for LCEL objects\n",
      "- **[How-to](/docs/expression_language/how_to)**: Key features of LCEL\n",
      "- **[Cookbook](/docs/expression_language/cookbook)**: Example code for accomplishing common tasks\n",
      "\n",
@@ -136,13 +136,13 @@
      "\n",
      "## Examples, ecosystem, and resources [](\\#examples-ecosystem-and-resources \"Direct link to Examples, ecosystem, and resources\")\n",
      "\n",
-      "### [Use cases](/docs/use_cases/question_answering/) [](\\#use-cases \"Direct link to use-cases\")\n",
+      "### [Use cases](/docs/how_to#qa-with-rag) [](\\#use-cases \"Direct link to use-cases\")\n",
      "\n",
      "Walkthroughs and techniques for common end-to-end use cases, like:\n",
      "\n",
-      "- [Document question answering](/docs/use_cases/question_answering/)\n",
+      "- [Document question answering](/docs/how_to#qa-with-rag)\n",
      "- [Chatbots](/docs/use_cases/chatbots/)\n",
-      "- [Analyzing structured data](/docs/use_cases/sql/)\n",
+      "- [Analyzing structured data](/docs/how_to#qa-over-sql--csv)\n",
      "- and much more...\n",
      "\n",
      "### [Integrations](/docs/integrations/providers/) [](\\#integrations \"Direct link to integrations\")\n",
--- a/docs/docs/integrations/document_transformers/cross_encoder_reranker.ipynb
+++ b/docs/docs/integrations/document_transformers/cross_encoder_reranker.ipynb
@@ -67,11 +67,11 @@
   "outputs": [],
   "source": [
    "from langchain.document_loaders import TextLoader\n",
-    "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
    "from langchain_community.vectorstores import FAISS\n",
+    "from langchain_huggingface import HuggingFaceEmbeddings\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "\n",
-    "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
+    "documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
    "texts = text_splitter.split_documents(documents)\n",
    "embeddingsModel = HuggingFaceEmbeddings(\n",
--- a/docs/docs/integrations/document_transformers/jina_rerank.ipynb
+++ b/docs/docs/integrations/document_transformers/jina_rerank.ipynb
@@ -101,7 +101,7 @@
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "\n",
    "documents = TextLoader(\n",
-    "    \"../../modules/state_of_the_union.txt\",\n",
+    "    \"../../how_to/state_of_the_union.txt\",\n",
    ").load()\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
    "texts = text_splitter.split_documents(documents)\n",
--- a/docs/docs/integrations/document_transformers/openvino_rerank.ipynb
+++ b/docs/docs/integrations/document_transformers/openvino_rerank.ipynb
@@ -119,7 +119,7 @@
      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
      "\n",
      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 73}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 73}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 2:\n",
      "\n",
@@ -134,7 +134,7 @@
      "The VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits. \n",
      "\n",
      "And tonight, I’m announcing we’re expanding eligibility to veterans suffering from nine respiratory cancers.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 88}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 88}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 3:\n",
      "\n",
@@ -149,7 +149,7 @@
      "But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. \n",
      "\n",
      "Danielle says Heath was a fighter to the very end.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 87}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 87}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 4:\n",
      "\n",
@@ -160,7 +160,7 @@
      "This is personal to me and Jill, to Kamala, and to so many of you. \n",
      "\n",
      "Cancer is the #2 cause of death in America–second only to heart disease.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 89}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 89}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 5:\n",
      "\n",
@@ -169,7 +169,7 @@
      "We will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \n",
      "\n",
      "But to compete for the best jobs of the future, we also need to level the playing field with China and other competitors.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 29}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 29}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 6:\n",
      "\n",
@@ -180,7 +180,7 @@
      "Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n",
      "\n",
      "In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 2}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 2}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 7:\n",
      "\n",
@@ -193,7 +193,7 @@
      "Inflation is robbing them of the gains they might otherwise feel. \n",
      "\n",
      "I get it. That’s why my top priority is getting prices under control.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 35}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 35}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 8:\n",
      "\n",
@@ -203,7 +203,7 @@
      "\n",
      "Invest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up  \n",
      "and the middle out, not from the top down.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 23}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 23}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 9:\n",
      "\n",
@@ -212,7 +212,7 @@
      "And I’m taking robust action to make sure the pain of our sanctions  is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n",
      "\n",
      "Tonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 14}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 14}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 10:\n",
      "\n",
@@ -223,7 +223,7 @@
      "Just last year, 55 Fortune 500 corporations earned $40 billion in profits and paid zero dollars in federal income tax.  \n",
      "\n",
      "That’s simply not fair. That’s why I’ve proposed a 15% minimum tax rate for corporations.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 46}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 46}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 11:\n",
      "\n",
@@ -232,7 +232,7 @@
      "For Joshua, and for the 200,000 other young people with Type 1 diabetes, let’s cap the cost of insulin at $35 a month so everyone can afford it.  \n",
      "\n",
      "Drug companies will still do very well. And while we’re at it let Medicare negotiate lower prices for prescription drugs, like the VA already does.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 41}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 41}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 12:\n",
      "\n",
@@ -241,14 +241,14 @@
      "We’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \n",
      "\n",
      "And we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 26}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 26}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 13:\n",
      "\n",
      "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
      "\n",
      "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 79}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 79}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 14:\n",
      "\n",
@@ -261,14 +261,14 @@
      "When they came home, many of the world’s fittest and best trained warriors were never the same. \n",
      "\n",
      "Headaches. Numbness. Dizziness.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 85}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 85}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 15:\n",
      "\n",
      "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
      "\n",
      "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 74}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 74}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 16:\n",
      "\n",
@@ -279,7 +279,7 @@
      "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n",
      "\n",
      "So let’s not abandon our streets. Or choose between safety and equal justice.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 67}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 67}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 17:\n",
      "\n",
@@ -288,7 +288,7 @@
      "4,000 projects have already been announced. \n",
      "\n",
      "And tonight, I’m announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 27}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 27}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 18:\n",
      "\n",
@@ -302,7 +302,7 @@
      "More support for patients and families. \n",
      "\n",
      "To get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 90}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 90}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 19:\n",
      "\n",
@@ -315,7 +315,7 @@
      "And so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \n",
      "\n",
      "I understand.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 18}\n",
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 18}\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Document 20:\n",
      "\n",
@@ -326,7 +326,7 @@
      "Imagine what it’s like to look at your child who needs insulin and have no idea how you’re going to pay for it.  \n",
      "\n",
      "What it does to your dignity, your ability to look your child in the eye, to be the parent you expect to be.\n",
-      "Metadata: {'source': '../../modules/state_of_the_union.txt', 'id': 40}\n"
+      "Metadata: {'source': '../../how_to/state_of_the_union.txt', 'id': 40}\n"
     ]
    }
   ],
@@ -337,7 +337,7 @@
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "\n",
    "documents = TextLoader(\n",
-    "    \"../../modules/state_of_the_union.txt\",\n",
+    "    \"../../how_to/state_of_the_union.txt\",\n",
    ").load()\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
    "texts = text_splitter.split_documents(documents)\n",
--- a/docs/docs/integrations/document_transformers/voyageai-reranker.ipynb
+++ b/docs/docs/integrations/document_transformers/voyageai-reranker.ipynb
@@ -318,7 +318,7 @@
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "from langchain_voyageai import VoyageAIEmbeddings\n",
    "\n",
-    "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
+    "documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
    "texts = text_splitter.split_documents(documents)\n",
    "retriever = FAISS.from_documents(\n",
--- a/docs/docs/integrations/graphs/kuzu_db.ipynb
+++ b/docs/docs/integrations/graphs/kuzu_db.ipynb
@@ -7,11 +7,12 @@
   "source": [
    "# Kuzu\n",
    "\n",
-    ">[Kùzu](https://kuzudb.com) is an in-process property graph database management system. \n",
-    ">\n",
-    ">This notebook shows how to use LLMs to provide a natural language interface to [Kùzu](https://kuzudb.com) database with `Cypher` graph query language.\n",
-    ">\n",
-    ">[Cypher](https://en.wikipedia.org/wiki/Cypher_(query_language)) is a declarative graph query language that allows for expressive and efficient data querying in a property graph."
+    ">[Kùzu](https://kuzudb.com) is an embeddable property graph database management system built for query speed and scalability.\n",
+    "> \n",
+    "> Kùzu has a permissive (MIT) open source license and implements [Cypher](https://en.wikipedia.org/wiki/Cypher_(query_language)), a declarative graph query language that allows for expressive and efficient data querying in a property graph.\n",
+    "> It uses columnar storage and its query processor contains novel join algorithms that allow it to scale to very large graphs without sacrificing query performance.\n",
+    "> \n",
+    "> This notebook shows how to use LLMs to provide a natural language interface to [Kùzu](https://kuzudb.com) database with Cypher."
   ]
  },
  {
@@ -21,7 +22,8 @@
   "source": [
    "## Setting up\n",
    "\n",
-    "Install the python package:\n",
+    "Kùzu is an embedded database (it runs in-process), so there are no servers to manage.\n",
+    "Simply install it via its Python package:\n",
    "\n",
    "```bash\n",
    "pip install kuzu\n",
@@ -32,7 +34,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -52,16 +54,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "<kuzu.query_result.QueryResult at 0x1066ff410>"
+       "<kuzu.query_result.QueryResult at 0x103a72290>"
      ]
     },
-     "execution_count": 2,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -84,16 +86,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "<kuzu.query_result.QueryResult at 0x107016210>"
+       "<kuzu.query_result.QueryResult at 0x103a9e750>"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -132,7 +134,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -143,7 +145,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -152,11 +154,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
-    "chain = KuzuQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)"
+    "chain = KuzuQAChain.from_llm(\n",
+    "    llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n",
+    "    graph=graph,\n",
+    "    verbose=True,\n",
+    ")"
   ]
  },
  {
@@ -166,12 +172,13 @@
   "source": [
    "## Refresh graph schema information\n",
    "\n",
-    "If the schema of database changes, you can refresh the schema information needed to generate Cypher statements."
+    "If the schema of database changes, you can refresh the schema information needed to generate Cypher statements.\n",
+    "You can also display the schema of the Kùzu graph as demonstrated below."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -180,7 +187,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@@ -205,78 +212,7 @@
   "source": [
    "## Querying the graph\n",
    "\n",
-    "We can now use the `KuzuQAChain` to ask question of the graph"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new  chain...\u001b[0m\n",
-      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie {name: 'The Godfather: Part II'}) RETURN p.name\u001b[0m\n",
-      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}]\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'Al Pacino and Robert De Niro both played in The Godfather: Part II.'"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain.run(\"Who played in The Godfather: Part II?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new  chain...\u001b[0m\n",
-      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n",
-      "RETURN m.name\u001b[0m\n",
-      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'m.name': 'The Godfather: Part II'}]\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'Robert De Niro played in The Godfather: Part II.'"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain.run(\"Robert De Niro played in which movies?\")"
+    "We can now use the `KuzuQAChain` to ask questions of the graph."
   ]
  },
  {
@@ -290,12 +226,13 @@
     "text": [
      "\n",
      "\n",
-      "\u001b[1m> Entering new  chain...\u001b[0m\n",
+      "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n",
-      "RETURN p.birthDate\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie)\n",
+      "WHERE m.name = 'The Godfather: Part II'\n",
+      "RETURN p.name\u001b[0m\n",
      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'p.birthDate': '1943-08-17'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}]\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -303,7 +240,8 @@
    {
     "data": {
      "text/plain": [
-       "'Robert De Niro was born on August 17, 1943.'"
+       "{'query': 'Who acted in The Godfather: Part II?',\n",
+       " 'result': 'Al Pacino, Robert De Niro acted in The Godfather: Part II.'}"
      ]
     },
     "execution_count": 11,
@@ -312,7 +250,7 @@
    }
   ],
   "source": [
-    "chain.run(\"Robert De Niro is born in which year?\")"
+    "chain.invoke(\"Who acted in The Godfather: Part II?\")"
   ]
  },
  {
@@ -326,13 +264,87 @@
     "text": [
      "\n",
      "\n",
-      "\u001b[1m> Entering new  chain...\u001b[0m\n",
+      "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie{name:'The Godfather: Part II'})\n",
-      "WITH p, m, p.birthDate AS birthDate\n",
-      "ORDER BY birthDate ASC\n",
-      "LIMIT 1\n",
-      "RETURN p.name\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie)\n",
+      "WHERE p.name = 'Robert De Niro'\n",
+      "RETURN m.name\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[{'m.name': 'The Godfather: Part II'}]\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'query': 'Robert De Niro played in which movies?',\n",
+       " 'result': 'Robert De Niro played in The Godfather: Part II.'}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.invoke(\"Robert De Niro played in which movies?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (:Person)-[:ActedIn]->(:Movie {name: 'Godfather: Part II'})\n",
+      "RETURN count(*)\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[{'COUNT_STAR()': 0}]\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'query': 'How many actors played in the Godfather: Part II?',\n",
+       " 'result': \"I don't know the answer.\"}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.invoke(\"How many actors played in the Godfather: Part II?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie {name: 'The Godfather: Part II'})\n",
+      "RETURN p.name\n",
+      "ORDER BY p.birthDate ASC\n",
+      "LIMIT 1\u001b[0m\n",
      "Full Context:\n",
      "\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}]\u001b[0m\n",
      "\n",
@@ -342,16 +354,114 @@
    {
     "data": {
      "text/plain": [
-       "'The oldest actor who played in The Godfather: Part II is Al Pacino.'"
+       "{'query': 'Who is the oldest actor who played in The Godfather: Part II?',\n",
+       " 'result': 'Al Pacino is the oldest actor who played in The Godfather: Part II.'}"
      ]
     },
-     "execution_count": 12,
+     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain.run(\"Who is the oldest actor who played in The Godfather: Part II?\")"
+    "chain.invoke(\"Who is the oldest actor who played in The Godfather: Part II?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use separate LLMs for Cypher and answer generation\n",
+    "\n",
+    "You can specify `cypher_llm` and `qa_llm` separately to use different LLMs for Cypher generation and answer generation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 0.3.0. Use RunnableSequence, e.g., `prompt | llm` instead.\n",
+      "  warn_deprecated(\n"
+     ]
+    }
+   ],
+   "source": [
+    "chain = KuzuQAChain.from_llm(\n",
+    "    cypher_llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n",
+    "    qa_llm=ChatOpenAI(temperature=0, model=\"gpt-4\"),\n",
+    "    graph=graph,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
+      "  warn_deprecated(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (:Person)-[:ActedIn]->(:Movie {name: 'The Godfather: Part II'})\n",
+      "RETURN count(*)\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[{'COUNT_STAR()': 2}]\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `Chain.__call__` was deprecated in langchain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
+      "  warn_deprecated(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'query': 'How many actors played in The Godfather: Part II?',\n",
+       " 'result': 'Two actors played in The Godfather: Part II.'}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.invoke(\"How many actors played in The Godfather: Part II?\")"
   ]
  }
 ],
@@ -371,7 +481,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.7"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/graphs/memgraph.ipynb
+++ b/docs/docs/integrations/graphs/memgraph.ipynb
@@ -584,7 +584,7 @@
   "id": "8edb9976",
   "metadata": {},
   "source": [
-    "To address this, we can adjust the initial Cypher prompt of the QA chain. This involves adding guidance to the LLM on how users can refer to specific platforms, such as PS5 in our case. We achieve this using the LangChain [PromptTemplate](/docs/modules/model_io/prompts/), creating a modified initial prompt. This modified prompt is then supplied as an argument to our refined Memgraph-LangChain instance."
+    "To address this, we can adjust the initial Cypher prompt of the QA chain. This involves adding guidance to the LLM on how users can refer to specific platforms, such as PS5 in our case. We achieve this using the LangChain [PromptTemplate](/docs/how_to#prompt-templates), creating a modified initial prompt. This modified prompt is then supplied as an argument to our refined Memgraph-LangChain instance."
   ]
  },
  {
--- a/docs/docs/integrations/graphs/networkx.ipynb
+++ b/docs/docs/integrations/graphs/networkx.ipynb
@@ -70,7 +70,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "with open(\"../../../modules/state_of_the_union.txt\") as f:\n",
+    "with open(\"../../../how_to/state_of_the_union.txt\") as f:\n",
    "    all_text = f.read()"
   ]
  },
--- a/docs/docs/integrations/llms/cohere.ipynb
+++ b/docs/docs/integrations/llms/cohere.ipynb
@@ -79,7 +79,7 @@
   "source": [
    "## Usage\n",
    "\n",
-    "Cohere supports all [LLM](/docs/modules/model_io/llms/) functionality:"
+    "Cohere supports all [LLM](/docs/how_to#llms) functionality:"
   ]
  },
  {
@@ -193,7 +193,7 @@
   "id": "39198f7d-6fc8-4662-954a-37ad38c4bec4",
   "metadata": {},
   "source": [
-    "You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/expression_language)"
+    "You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/concepts#langchain-expression-language)"
   ]
  },
  {
--- a/Show More
+++ b/Show More