core: default implementation for embed_documents

core, tests: more tolerant _aget_relevant_documents function (#28462 )
cli: release 0.0.35 (#28557 )
2026-02-08 18:19:21 +00:00 · 2024-12-05 17:17:02 -08:00 · 2024-12-06 00:49:30 +00:00 · 2024-12-05 16:40:52 -08:00 · 2024-12-06 00:32:47 +00:00 · 2024-12-05 23:57:54 +00:00
161 changed files with 7838 additions and 2765 deletions
--- a/.github/DISCUSSION_TEMPLATE/q-a.yml
+++ b/.github/DISCUSSION_TEMPLATE/q-a.yml
@@ -22,7 +22,7 @@ body:
        if there's another way to solve your problem:
        
        [LangChain documentation with the integrated search](https://python.langchain.com/docs/get_started/introduction),
-        [API Reference](https://api.python.langchain.com/en/stable/),
+        [API Reference](https://python.langchain.com/api_reference/),
        [GitHub search](https://github.com/langchain-ai/langchain),
        [LangChain Github Discussions](https://github.com/langchain-ai/langchain/discussions),
        [LangChain Github Issues](https://github.com/langchain-ai/langchain/issues?q=is%3Aissue),
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -16,7 +16,7 @@ body:
        if there's another way to solve your problem:
        
        [LangChain documentation with the integrated search](https://python.langchain.com/docs/get_started/introduction),
-        [API Reference](https://api.python.langchain.com/en/stable/),
+        [API Reference](https://python.langchain.com/api_reference/),
        [GitHub search](https://github.com/langchain-ai/langchain),
        [LangChain Github Discussions](https://github.com/langchain-ai/langchain/discussions),
        [LangChain Github Issues](https://github.com/langchain-ai/langchain/issues?q=is%3Aissue),
--- a/.github/ISSUE_TEMPLATE/documentation.yml
+++ b/.github/ISSUE_TEMPLATE/documentation.yml
@@ -21,7 +21,7 @@ body:
      place to ask your question:
      
      [LangChain documentation with the integrated search](https://python.langchain.com/docs/get_started/introduction),
-      [API Reference](https://api.python.langchain.com/en/stable/),
+      [API Reference](https://python.langchain.com/api_reference/),
      [GitHub search](https://github.com/langchain-ai/langchain),
      [LangChain Github Discussions](https://github.com/langchain-ai/langchain/discussions),
      [LangChain Github Issues](https://github.com/langchain-ai/langchain/issues?q=is%3Aissue),
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -272,6 +272,9 @@ if __name__ == "__main__":
            # TODO: update to include all packages that rely on standard-tests (all partner packages)
            # note: won't run on external repo partners
            dirs_to_run["lint"].add("libs/standard-tests")
+            dirs_to_run["test"].add("libs/standard-tests")
+            dirs_to_run["lint"].add("libs/cli")
+            dirs_to_run["test"].add("libs/cli")
            dirs_to_run["test"].add("libs/partners/mistralai")
            dirs_to_run["test"].add("libs/partners/openai")
            dirs_to_run["test"].add("libs/partners/anthropic")
@@ -279,8 +282,9 @@ if __name__ == "__main__":
            dirs_to_run["test"].add("libs/partners/groq")

        elif file.startswith("libs/cli"):
-            # todo: add cli makefile
-            pass
+            dirs_to_run["lint"].add("libs/cli")
+            dirs_to_run["test"].add("libs/cli")
+            
        elif file.startswith("libs/partners"):
            partner_dir = file.split("/")[2]
            if os.path.isdir(f"libs/partners/{partner_dir}") and [
--- a/.github/workflows/_compile_integration_test.yml
+++ b/.github/workflows/_compile_integration_test.yml
@@ -13,7 +13,7 @@ on:
        description: "Python version to use"

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"

 jobs:
  build:
@@ -21,6 +21,7 @@ jobs:
      run:
        working-directory: ${{ inputs.working-directory }}
    runs-on: ubuntu-latest
+    timeout-minutes: 20
    name: "poetry run pytest -m compile tests/integration_tests #${{ inputs.python-version }}"
    steps:
      - uses: actions/checkout@v4
--- a/.github/workflows/_integration_test.yml
+++ b/.github/workflows/_integration_test.yml
@@ -12,7 +12,7 @@ on:
        description: "Python version to use"

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"

 jobs:
  build:
--- a/.github/workflows/_lint.yml
+++ b/.github/workflows/_lint.yml
@@ -13,7 +13,7 @@ on:
        description: "Python version to use"

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"
  WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}

  # This env var allows us to get inline annotations when ruff has complaints.
@@ -23,6 +23,7 @@ jobs:
  build:
    name: "make lint #${{ inputs.python-version }}"
    runs-on: ubuntu-latest
+    timeout-minutes: 20
    steps:
      - uses: actions/checkout@v4

--- a/.github/workflows/_release.yml
+++ b/.github/workflows/_release.yml
@@ -21,7 +21,7 @@ on:

 env:
  PYTHON_VERSION: "3.11"
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"

 jobs:
  build:
@@ -167,6 +167,7 @@ jobs:
      - release-notes
      - test-pypi-publish
    runs-on: ubuntu-latest
+    timeout-minutes: 20
    steps:
      - uses: actions/checkout@v4

@@ -191,7 +192,12 @@ jobs:
          poetry-version: ${{ env.POETRY_VERSION }}
          working-directory: ${{ inputs.working-directory }}

-      - name: Import published package
+      - uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: ${{ inputs.working-directory }}/dist/
+
+      - name: Import dist package
        shell: bash
        working-directory: ${{ inputs.working-directory }}
        env:
@@ -207,15 +213,7 @@ jobs:
        # - attempt install again after 5 seconds if it fails because there is
        #   sometimes a delay in availability on test pypi
        run: |
-          poetry run pip install \
-            --extra-index-url https://test.pypi.org/simple/ \
-            "$PKG_NAME==$VERSION" || \
-          ( \
-            sleep 15 && \
-            poetry run pip install \
-              --extra-index-url https://test.pypi.org/simple/ \
-              "$PKG_NAME==$VERSION" \
-          )
+          poetry run pip install dist/*.whl

          # Replace all dashes in the package name with underscores,
          # since that's how Python imports packages with dashes in the name.
@@ -224,10 +222,10 @@ jobs:
          poetry run python -c "import $IMPORT_NAME; print(dir($IMPORT_NAME))"

      - name: Import test dependencies
-        run: poetry install --with test
+        run: poetry install --with test --no-root
        working-directory: ${{ inputs.working-directory }}

-      # Overwrite the local version of the package with the test PyPI version.
+      # Overwrite the local version of the package with the built version
      - name: Import published package (again)
        working-directory: ${{ inputs.working-directory }}
        shell: bash
@@ -235,9 +233,7 @@ jobs:
          PKG_NAME: ${{ needs.build.outputs.pkg-name }}
          VERSION: ${{ needs.build.outputs.version }}
        run: |
-          poetry run pip install \
-            --extra-index-url https://test.pypi.org/simple/ \
-            "$PKG_NAME==$VERSION"
+          poetry run pip install dist/*.whl

      - name: Run unit tests
        run: make tests
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -13,7 +13,7 @@ on:
        description: "Python version to use"

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"

 jobs:
  build:
@@ -21,6 +21,7 @@ jobs:
      run:
        working-directory: ${{ inputs.working-directory }}
    runs-on: ubuntu-latest
+    timeout-minutes: 20
    name: "make test #${{ inputs.python-version }}"
    steps:
      - uses: actions/checkout@v4
--- a/.github/workflows/_test_doc_imports.yml
+++ b/.github/workflows/_test_doc_imports.yml
@@ -9,11 +9,12 @@ on:
        description: "Python version to use"

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"

 jobs:
  build:
    runs-on: ubuntu-latest
+    timeout-minutes: 20
    name: "check doc imports #${{ inputs.python-version }}"
    steps:
      - uses: actions/checkout@v4
--- a/.github/workflows/_test_pydantic.yml
+++ b/.github/workflows/_test_pydantic.yml
@@ -18,7 +18,7 @@ on:
        description: "Pydantic version to test."

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"

 jobs:
  build:
@@ -26,6 +26,7 @@ jobs:
      run:
        working-directory: ${{ inputs.working-directory }}
    runs-on: ubuntu-latest
+    timeout-minutes: 20
    name: "make test # pydantic: ~=${{ inputs.pydantic-version }}, python: ${{ inputs.python-version }}, "
    steps:
      - uses: actions/checkout@v4
--- a/.github/workflows/_test_release.yml
+++ b/.github/workflows/_test_release.yml
@@ -14,7 +14,7 @@ on:
        description: "Release from a non-master branch (danger!)"

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"
  PYTHON_VERSION: "3.10"

 jobs:
--- a/.github/workflows/api_doc_build.yml
+++ b/.github/workflows/api_doc_build.yml
@@ -5,7 +5,7 @@ on:
  schedule:
    - cron:  '0 13 * * *'
 env:
-  POETRY_VERSION: "1.8.1"
+  POETRY_VERSION: "1.8.4"
  PYTHON_VERSION: "3.11"

 jobs:
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -17,7 +17,7 @@ concurrency:
  cancel-in-progress: true

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"

 jobs:
  build:
@@ -119,6 +119,7 @@ jobs:
        job-configs: ${{ fromJson(needs.build.outputs.extended-tests) }}
      fail-fast: false
    runs-on: ubuntu-latest
+    timeout-minutes: 20
    defaults:
      run:
        working-directory: ${{ matrix.job-configs.working-directory }}
--- a/.github/workflows/run_notebooks.yml
+++ b/.github/workflows/run_notebooks.yml
@@ -15,7 +15,7 @@ on:
    - cron: '0 13 * * *'

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"

 jobs:
  build:
--- a/.github/workflows/scheduled_test.yml
+++ b/.github/workflows/scheduled_test.yml
@@ -2,32 +2,60 @@ name: Scheduled tests

 on:
  workflow_dispatch:  # Allows to trigger the workflow manually in GitHub UI
+    inputs:
+      working-directory-force:
+        type: string
+        description: "From which folder this pipeline executes - defaults to all in matrix - example value: libs/partners/anthropic"
+      python-version-force:
+        type: string
+        description: "Python version to use - defaults to 3.9 and 3.11 in matrix - example value: 3.9"
  schedule:
    - cron:  '0 13 * * *'

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.4"
+  DEFAULT_LIBS: '["libs/partners/openai", "libs/partners/anthropic", "libs/partners/fireworks", "libs/partners/groq", "libs/partners/mistralai", "libs/partners/google-vertexai", "libs/partners/google-genai", "libs/partners/aws"]'

 jobs:
+  compute-matrix:
+    if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
+    runs-on: ubuntu-latest
+    name: Compute matrix
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Set matrix
+        id: set-matrix
+        env:
+          DEFAULT_LIBS: ${{ env.DEFAULT_LIBS }}
+          WORKING_DIRECTORY_FORCE: ${{ github.event.inputs.working-directory-force || '' }}
+          PYTHON_VERSION_FORCE: ${{ github.event.inputs.python-version-force || '' }}
+        run: |
+          # echo "matrix=..." where matrix is a json formatted str with keys python-version and working-directory
+          # python-version should default to 3.9 and 3.11, but is overridden to [PYTHON_VERSION_FORCE] if set
+          # working-directory should default to DEFAULT_LIBS, but is overridden to [WORKING_DIRECTORY_FORCE] if set
+          python_version='["3.9", "3.11"]'
+          working_directory="$DEFAULT_LIBS"
+          if [ -n "$PYTHON_VERSION_FORCE" ]; then
+            python_version="[\"$PYTHON_VERSION_FORCE\"]"
+          fi
+          if [ -n "$WORKING_DIRECTORY_FORCE" ]; then
+            working_directory="[\"$WORKING_DIRECTORY_FORCE\"]"
+          fi
+          matrix="{\"python-version\": $python_version, \"working-directory\": $working_directory}"
+          echo $matrix
+          echo "matrix=$matrix" >> $GITHUB_OUTPUT
  build:
    if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
    name: Python ${{ matrix.python-version }} - ${{ matrix.working-directory }}
    runs-on: ubuntu-latest
+    needs: [compute-matrix]
+    timeout-minutes: 20
    strategy:
      fail-fast: false
      matrix:
-        python-version:
-          - "3.9"
-          - "3.11"
-        working-directory:
-          - "libs/partners/openai"
-          - "libs/partners/anthropic"
-          - "libs/partners/fireworks"
-          - "libs/partners/groq"
-          - "libs/partners/mistralai"
-          - "libs/partners/google-vertexai"
-          - "libs/partners/google-genai"
-          - "libs/partners/aws"
+        python-version: ${{ fromJSON(needs.compute-matrix.outputs.matrix).python-version }}
+        working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}

    steps:
      - uses: actions/checkout@v4
--- a/6
+++ b/6
@@ -69,7 +69,11 @@ lint lint_package lint_tests:
 	poetry run ruff check docs cookbook
 	poetry run ruff format docs cookbook cookbook --diff
 	poetry run ruff check --select I docs cookbook
-	git grep 'from langchain import' docs/docs cookbook | grep -vE 'from langchain import (hub)' && exit 1 || exit 0
+	git --no-pager grep 'from langchain import' docs cookbook | grep -vE 'from langchain import (hub)' && echo "Error: no importing langchain from root in docs, except for hub" && exit 1 || exit 0
+	
+	git --no-pager grep 'api.python.langchain.com' -- docs/docs ':!docs/docs/additional_resources/arxiv_references.mdx' ':!docs/docs/integrations/document_loaders/sitemap.ipynb' || exit 0 && \
+	echo "Error: you should link python.langchain.com/api_reference, not api.python.langchain.com in the docs" && \
+	exit 1

 ## format: Format the project files.
 format format_diff:
--- a/README.md
+++ b/README.md
@@ -123,7 +123,7 @@ Please see [here](https://python.langchain.com) for full documentation, which in
 - [Tutorials](https://python.langchain.com/docs/tutorials/): If you're looking to build something specific or are more of a hands-on learner, check out our tutorials. This is the best place to get started.
 - [How-to guides](https://python.langchain.com/docs/how_to/): Answers to “How do I….?” type questions. These guides are goal-oriented and concrete; they're meant to help you complete a specific task.
 - [Conceptual guide](https://python.langchain.com/docs/concepts/): Conceptual explanations of the key parts of the framework.
- [API Reference](https://api.python.langchain.com): Thorough documentation of every class and method.
+- [API Reference](https://python.langchain.com/api_reference/): Thorough documentation of every class and method.

 ## 🌐 Ecosystem

--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,5 +1,30 @@
 # Security Policy

+LangChain has a large ecosystem of integrations with various external resources like local and remote file systems, APIs and databases. These integrations allow developers to create versatile applications that combine the power of LLMs with the ability to access, interact with and manipulate external resources.
+
+## Best practices
+
+When building such applications developers should remember to follow good security practices:
+
+* [**Limit Permissions**](https://en.wikipedia.org/wiki/Principle_of_least_privilege): Scope permissions specifically to the application's need. Granting broad or excessive permissions can introduce significant security vulnerabilities. To avoid such vulnerabilities, consider using read-only credentials, disallowing access to sensitive resources, using sandboxing techniques (such as running inside a container), specifying proxy configurations to control external requests, etc. as appropriate for your application.
+* **Anticipate Potential Misuse**: Just as humans can err, so can Large Language Models (LLMs). Always assume that any system access or credentials may be used in any way allowed by the permissions they are assigned. For example, if a pair of database credentials allows deleting data, it’s safest to assume that any LLM able to use those credentials may in fact delete data.
+* [**Defense in Depth**](https://en.wikipedia.org/wiki/Defense_in_depth_(computing)): No security technique is perfect. Fine-tuning and good chain design can reduce, but not eliminate, the odds that a Large Language Model (LLM) may make a mistake. It’s best to combine multiple layered security approaches rather than relying on any single layer of defense to ensure security. For example: use both read-only permissions and sandboxing to ensure that LLMs are only able to access data that is explicitly meant for them to use.
+
+Risks of not doing so include, but are not limited to:
+* Data corruption or loss.
+* Unauthorized access to confidential information.
+* Compromised performance or availability of critical resources.
+
+Example scenarios with mitigation strategies:
+
+* A user may ask an agent with access to the file system to delete files that should not be deleted or read the content of files that contain sensitive information. To mitigate, limit the agent to only use a specific directory and only allow it to read or write files that are safe to read or write. Consider further sandboxing the agent by running it in a container.
+* A user may ask an agent with write access to an external API to write malicious data to the API, or delete data from that API. To mitigate, give the agent read-only API keys, or limit it to only use endpoints that are already resistant to such misuse.
+* A user may ask an agent with access to a database to drop a table or mutate the schema. To mitigate, scope the credentials to only the tables that the agent needs to access and consider issuing READ-ONLY credentials.
+
+If you're building applications that access external resources like file systems, APIs
+or databases, consider speaking with your company's security team to determine how to best
+design and secure your applications.
+
 ## Reporting OSS Vulnerabilities

 LangChain is partnered with [huntr by Protect AI](https://huntr.com/) to provide 
@@ -14,7 +39,7 @@ Before reporting a vulnerability, please review:

 1) In-Scope Targets and Out-of-Scope Targets below.
 2) The [langchain-ai/langchain](https://python.langchain.com/docs/contributing/repo_structure) monorepo structure.
-3) LangChain [security guidelines](https://python.langchain.com/docs/security) to
+3) The [Best practicies](#best-practices) above to
   understand what we consider to be a security vulnerability vs. developer
   responsibility.

@@ -33,13 +58,13 @@ The following packages and repositories are eligible for bug bounties:
 All out of scope targets defined by huntr as well as:

 - **langchain-experimental**: This repository is for experimental code and is not
-  eligible for bug bounties, bug reports to it will be marked as interesting or waste of
+  eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
  time and published with no bounty attached.
 - **tools**: Tools in either langchain or langchain-community are not eligible for bug
  bounties. This includes the following directories
-  - langchain/tools
-  - langchain-community/tools
-  - Please review our [security guidelines](https://python.langchain.com/docs/security)
+  - libs/langchain/langchain/tools
+  - libs/community/langchain_community/tools
+  - Please review the [best practices](#best-practices)
    for more details, but generally tools interact with the real world. Developers are
    expected to understand the security implications of their code and are responsible
    for the security of their tools.
@@ -47,7 +72,7 @@ All out of scope targets defined by huntr as well as:
  case basis, but likely will not be eligible for a bounty as the code is already
  documented with guidelines for developers that should be followed for making their
  application secure.
- Any LangSmith related repositories or APIs see below.
+- Any LangSmith related repositories or APIs (see [Reporting LangSmith Vulnerabilities](#reporting-langsmith-vulnerabilities)).

 ## Reporting LangSmith Vulnerabilities

--- a/docs/Makefile
+++ b/docs/Makefile
@@ -47,6 +47,7 @@ generate-files:
 	$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR)

 	curl https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md | sed 's/<=/\&lt;=/g' > $(INTERMEDIATE_DIR)/langserve.md
+	cp ../SECURITY.md $(INTERMEDIATE_DIR)/security.md
 	$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/

 copy-infra:
--- a/docs/api_reference/_static/css/custom.css
+++ b/docs/api_reference/_static/css/custom.css
@@ -80,6 +80,8 @@
 html {
    --pst-font-family-base: 'Inter';
    --pst-font-family-heading: 'Inter Tight', sans-serif;
+
+    --pst-icon-versionmodified-deprecated: var(--pst-icon-exclamation-triangle);
 }

 /*******************************************************************************
@@ -92,7 +94,7 @@ html {
 * https://sass-lang.com/documentation/interpolation
 */
 /* Defaults to light mode if data-theme is not set */
-html:not([data-theme]) {
+html:not([data-theme]), html[data-theme=light] {
  --pst-color-primary: #287977;
  --pst-color-primary-bg: #80D6D3;
  --pst-color-secondary: #6F3AED;
@@ -122,58 +124,8 @@ html:not([data-theme]) {
  --pst-color-on-background: #F4F9F8;
  --pst-color-surface: #F4F9F8;
  --pst-color-on-surface: #222832;
-}
-html:not([data-theme]) {
-  --pst-color-link: var(--pst-color-primary);
-  --pst-color-link-hover: var(--pst-color-secondary);
-}
-html:not([data-theme]) .only-dark,
-html:not([data-theme]) .only-dark ~ figcaption {
-  display: none !important;
-}
-
-/* NOTE: @each {...} is like a for-loop
- * https://sass-lang.com/documentation/at-rules/control/each
- */
-html[data-theme=light] {
-  --pst-color-primary: #287977;
-  --pst-color-primary-bg: #80D6D3;
-  --pst-color-secondary: #6F3AED;
-  --pst-color-secondary-bg: #DAD6FE;
-  --pst-color-accent: #c132af;
-  --pst-color-accent-bg: #f8dff5;
-  --pst-color-info: #276be9;
-  --pst-color-info-bg: #dce7fc;
-  --pst-color-warning: #f66a0a;
-  --pst-color-warning-bg: #f8e3d0;
-  --pst-color-success: #00843f;
-  --pst-color-success-bg: #d6ece1;
-  --pst-color-attention: var(--pst-color-warning);
-  --pst-color-attention-bg: var(--pst-color-warning-bg);
-  --pst-color-danger: #d72d47;
-  --pst-color-danger-bg: #f9e1e4;
-  --pst-color-text-base: #222832;
-  --pst-color-text-muted: #48566b;
-  --pst-color-heading-color: #ffffff;
-  --pst-color-shadow: rgba(0, 0, 0, 0.1);
-  --pst-color-border: #d1d5da;
-  --pst-color-border-muted: rgba(23, 23, 26, 0.2);
-  --pst-color-inline-code: #912583;
-  --pst-color-inline-code-links: #246161;
-  --pst-color-target: #f3cf95;
-  --pst-color-background: #ffffff;
-  --pst-color-on-background: #F4F9F8;
-  --pst-color-surface: #F4F9F8;
-  --pst-color-on-surface: #222832;
-  color-scheme: light;
-}
-html[data-theme=light] {
-  --pst-color-link: var(--pst-color-primary);
-  --pst-color-link-hover: var(--pst-color-secondary);
-}
-html[data-theme=light] .only-dark,
-html[data-theme=light] .only-dark ~ figcaption {
-  display: none !important;
+  --pst-color-deprecated: #f47d2e;
+  --pst-color-deprecated-bg: #fff3e8;
 }

 html[data-theme=dark] {
@@ -206,6 +158,8 @@ html[data-theme=dark] {
  --pst-color-on-background: #222832;
  --pst-color-surface: #29313d;
  --pst-color-on-surface: #f3f4f5;
+  --pst-color-deprecated: #b46f3e;
+  --pst-color-deprecated-bg: #341906;
  /* Adjust images in dark mode (unless they have class .only-dark or
   * .dark-light, in which case assume they're already optimized for dark
   * mode).
@@ -216,6 +170,30 @@ html[data-theme=dark] {
  */
  color-scheme: dark;
 }
+
+html:not([data-theme]) {
+  --pst-color-link: var(--pst-color-primary);
+  --pst-color-link-hover: var(--pst-color-secondary);
+}
+html:not([data-theme]) .only-dark,
+html:not([data-theme]) .only-dark ~ figcaption {
+  display: none !important;
+}
+
+/* NOTE: @each {...} is like a for-loop
+ * https://sass-lang.com/documentation/at-rules/control/each
+ */
+html[data-theme=light] {
+  color-scheme: light;
+}
+html[data-theme=light] {
+  --pst-color-link: var(--pst-color-primary);
+  --pst-color-link-hover: var(--pst-color-secondary);
+}
+html[data-theme=light] .only-dark,
+html[data-theme=light] .only-dark ~ figcaption {
+  display: none !important;
+}
 html[data-theme=dark] {
  --pst-color-link: var(--pst-color-primary);
  --pst-color-link-hover: var(--pst-color-secondary);
@@ -389,6 +367,13 @@ html[data-theme=dark] .MathJax_SVG * {
 div.deprecated {
  margin-top: 0.5em;
  margin-bottom: 2em;
+
+  background-color: var(--pst-color-deprecated-bg);
+  border-color: var(--pst-color-deprecated);
+}
+
+span.versionmodified.deprecated:before {
+  color: var(--pst-color-deprecated);
 }

 .admonition-beta.admonition, div.admonition-beta.admonition {
@@ -408,4 +393,4 @@ dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.
 p {
  font-size: 0.9rem;
  margin-bottom: 0.5rem;
-}
+}
--- a/docs/docs/additional_resources/arxiv_references.mdx
+++ b/docs/docs/additional_resources/arxiv_references.mdx
@@ -28,19 +28,19 @@ From the opposite direction, scientists use `LangChain` in research and referenc
 | `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023&#8209;07&#8209;18 | `Cookbook:` [Semi Structured Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
 | `2307.03172v3` [Lost in the Middle: How Language Models Use Long Contexts](http://arxiv.org/abs/2307.03172v3) | Nelson F. Liu, Kevin Lin, John Hewitt,  et al. | 2023&#8209;07&#8209;06 | `Docs:` [docs/how_to/long_context_reorder](https://python.langchain.com/docs/how_to/long_context_reorder)
 | `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023&#8209;05&#8209;23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [Rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
-| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023&#8209;05&#8209;15 | `API:` [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot), `Cookbook:` [Tree Of Thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
+| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023&#8209;05&#8209;15 | `API:` [langchain_experimental.tot](https://python.langchain.com/api_reference/experimental/tot.html), `Cookbook:` [Tree Of Thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
 | `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023&#8209;05&#8209;06 | `Cookbook:` [Plan And Execute Agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
-| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023&#8209;05&#8209;03 | `Docs:` [docs/how_to/contextual_compression](https://python.langchain.com/docs/how_to/contextual_compression), `API:` [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023&#8209;05&#8209;03 | `Docs:` [docs/how_to/contextual_compression](https://python.langchain.com/docs/how_to/contextual_compression), `API:` [langchain...LLMListwiseRerank](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#)
 | `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023&#8209;04&#8209;17 | `Cookbook:` [Semi Structured Multi Modal Rag Llama2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi Structured And Multi Modal Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)
 | `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023&#8209;04&#8209;07 | `Cookbook:` [Generative Agents Interactive Simulacra Of Human Behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [Multiagent Bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)
 | `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023&#8209;03&#8209;31 | `Cookbook:` [Camel Role Playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
-| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023&#8209;03&#8209;30 | `API:` [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [Hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
+| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023&#8209;03&#8209;30 | `API:` [langchain_experimental.autonomous_agents](https://python.langchain.com/api_reference/experimental/autonomous_agents.html), `Cookbook:` [Hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
 | `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023&#8209;01&#8209;24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
 | `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022&#8209;12&#8209;20 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts), `API:` [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [Hypothetical Document Embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
 | `2212.08073v1` [Constitutional AI: Harmlessness from AI Feedback](http://arxiv.org/abs/2212.08073v1) | Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al. | 2022&#8209;12&#8209;15 | `Docs:` [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/docs/versions/migrating_chains/constitutional_chain)
-| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022&#8209;12&#8209;12 | `API:` [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
+| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022&#8209;12&#8209;12 | `API:` [langchain_experimental.fallacy_removal](https://python.langchain.com/api_reference/experimental/fallacy_removal.html)
 | `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022&#8209;11&#8209;25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
-| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022&#8209;11&#8209;18 | `API:` [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [Program Aided Language Model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
+| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022&#8209;11&#8209;18 | `API:` [langchain_experimental.pal_chain](https://python.langchain.com/api_reference/experimental/pal_chain.html), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [Program Aided Language Model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
 | `2210.11934v2` [An Analysis of Fusion Functions for Hybrid Retrieval](http://arxiv.org/abs/2210.11934v2) | Sebastian Bruch, Siyu Gai, Amir Ingber | 2022&#8209;10&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
 | `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022&#8209;10&#8209;06 | `Docs:` [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/docs/concepts), `API:` [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
 | `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022&#8209;09&#8209;22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
@@ -49,7 +49,7 @@ From the opposite direction, scientists use `LangChain` in research and referenc
 | `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022&#8209;03&#8209;15 | `Docs:` [docs/tutorials/sql_qa](https://python.langchain.com/docs/tutorials/sql_qa), `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
 | `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022&#8209;02&#8209;01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
 | `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al. | 2021&#8209;12&#8209;02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/docs/integrations/providers/dspy)
-| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021&#8209;02&#8209;26 | `API:` [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
+| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021&#8209;02&#8209;26 | `API:` [langchain_experimental.open_clip](https://python.langchain.com/api_reference/experimental/open_clip.html)
 | `2005.14165v4` [Language Models are Few-Shot Learners](http://arxiv.org/abs/2005.14165v4) | Tom B. Brown, Benjamin Mann, Nick Ryder,  et al. | 2020&#8209;05&#8209;28 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
 | `2005.11401v4` [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](http://arxiv.org/abs/2005.11401v4) | Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al. | 2020&#8209;05&#8209;22 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
 | `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019&#8209;09&#8209;11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
@@ -433,7 +433,7 @@ for retrieval-augmented LLM.
 - **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1)  **Published Date:** 2023-05-15
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot)
+   - **API Reference:** [langchain_experimental.tot](https://python.langchain.com/api_reference/experimental/tot.html)
   - **Cookbook:** [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)

 **Abstract:** In this paper, we introduce the Tree-of-Thought (ToT) framework, a novel
@@ -490,7 +490,7 @@ https://github.com/AGI-Edgerunners/Plan-and-Solve-Prompting.
 - **LangChain:**

   - **Documentation:** [docs/how_to/contextual_compression](https://python.langchain.com/docs/how_to/contextual_compression)
-   - **API Reference:** [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+   - **API Reference:** [langchain...LLMListwiseRerank](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#)

 **Abstract:** Supervised ranking methods based on bi-encoder or cross-encoder architectures
 have shown success in multi-stage text ranking tasks, but they require large
@@ -597,7 +597,7 @@ agents and beyond: https://github.com/camel-ai/camel.
 - **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4)  **Published Date:** 2023-03-30
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents)
+   - **API Reference:** [langchain_experimental.autonomous_agents](https://python.langchain.com/api_reference/experimental/autonomous_agents.html)
   - **Cookbook:** [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)

 **Abstract:** Solving complicated AI tasks with different domains and modalities is a key
@@ -704,7 +704,7 @@ labels.
 - **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3)  **Published Date:** 2022-12-12
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
+   - **API Reference:** [langchain_experimental.fallacy_removal](https://python.langchain.com/api_reference/experimental/fallacy_removal.html)

 **Abstract:** The spread of misinformation, propaganda, and flawed argumentation has been
 amplified in the Internet era. Given the volume of data and the subtlety of
@@ -759,7 +759,7 @@ performance across three real-world tasks on multiple LLMs.
 - **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2)  **Published Date:** 2022-11-18
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
+   - **API Reference:** [langchain_experimental.pal_chain](https://python.langchain.com/api_reference/experimental/pal_chain.html), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
   - **Cookbook:** [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)

 **Abstract:** Large language models (LLMs) have recently demonstrated an impressive ability
@@ -992,7 +992,7 @@ footprint of late interaction models by 6--10$\times$.
 - **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1)  **Published Date:** 2021-02-26
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
+   - **API Reference:** [langchain_experimental.open_clip](https://python.langchain.com/api_reference/experimental/open_clip.html)

 **Abstract:** State-of-the-art computer vision systems are trained to predict a fixed set
 of predetermined object categories. This restricted form of supervision limits
--- a/docs/docs/concepts/embedding_models.mdx
+++ b/docs/docs/concepts/embedding_models.mdx
@@ -3,7 +3,7 @@

 :::info[Prerequisites]

-* [Documents](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html)
+* [Documents](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html)

 :::

--- a/docs/docs/concepts/retrieval.mdx
+++ b/docs/docs/concepts/retrieval.mdx
@@ -221,7 +221,7 @@ They are particularly useful for storing and querying complex relationships betw
 LangChain provides a unified interface for interacting with various retrieval systems through the [retriever](/docs/concepts/retrievers/) concept. The interface is straightforward:

 1. Input: A query (string)
-2. Output: A list of documents (standardized LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects)
+2. Output: A list of documents (standardized LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects)

 You can create a retriever using any of the retrieval systems mentioned earlier. The query analysis techniques we discussed are particularly useful here, as they enable natural language interfaces for databases that typically require structured query languages.
 For example, you can build a retriever for a SQL database using text-to-SQL conversion. This allows a natural language query (string) to be transformed into a SQL query behind the scenes.
--- a/docs/docs/concepts/retrievers.mdx
+++ b/docs/docs/concepts/retrievers.mdx
@@ -18,7 +18,7 @@ Because of their importance and variability, LangChain provides a uniform interf
 The LangChain [retriever](/docs/concepts/retrievers/) interface is straightforward:

 1. Input: A query (string)
-2. Output: A list of documents (standardized LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects)
+2. Output: A list of documents (standardized LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects)

 ## Key concept

@@ -29,7 +29,7 @@ All retrievers implement a simple interface for retrieving documents using natur
 ## Interface 

 The only requirement for a retriever is the ability to accepts a query and return documents. 
-In particular, [LangChain's retriever class](https://api.python.langchain.com/en/latest/retrievers/langchain_core.retrievers.BaseRetriever.html) only requires that the `_get_relevant_documents` method is implemented, which takes a `query: str` and returns a list of [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects that are most relevant to the query.
+In particular, [LangChain's retriever class](https://python.langchain.com/api_reference/core/retrievers/langchain_core.retrievers.BaseRetriever.html#) only requires that the `_get_relevant_documents` method is implemented, which takes a `query: str` and returns a list of [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects that are most relevant to the query.
 The underlying logic used to get relevant documents is specified by the retriever and can be whatever is most useful for the application.

 A LangChain retriever is a [runnable](/docs/how_to/lcel_cheatsheet/), which is a standard interface is for LangChain components. 
@@ -39,7 +39,7 @@ This means that it has a few common methods, including `invoke`, that are used t
 docs = retriever.invoke(query)
 ```

-Retrievers return a list of [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects, which have two attributes:
+Retrievers return a list of [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects, which have two attributes:

 * `page_content`: The content of this document. Currently is a string.
 * `metadata`: Arbitrary metadata associated with this document (e.g., document id, file name, source, etc). 
--- a/docs/docs/concepts/runnables.mdx
+++ b/docs/docs/concepts/runnables.mdx
@@ -125,7 +125,7 @@ Please see the [Configurable Runnables](#configurable-runnables) section for mor

 LangChain will automatically try to infer the input and output types of a Runnable based on available information.

-Currently, this inference does not work well for more complex Runnables that are built using [LCEL](/docs/concepts/lcel) composition, and the inferred input and / or output types may be incorrect. In these cases, we recommend that users override the inferred input and output types using the `with_types` method ([API Reference](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_types
+Currently, this inference does not work well for more complex Runnables that are built using [LCEL](/docs/concepts/lcel) composition, and the inferred input and / or output types may be incorrect. In these cases, we recommend that users override the inferred input and output types using the `with_types` method ([API Reference](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_types
 ).

 ## RunnableConfig
--- a/docs/docs/concepts/vectorstores.mdx
+++ b/docs/docs/concepts/vectorstores.mdx
@@ -59,7 +59,7 @@ vector_store = InMemoryVectorStore(embedding=SomeEmbeddingModel())

 To add documents, use the `add_documents` method.

-This API works with a list of [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects.
+This API works with a list of [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects.
 `Document` objects all have `page_content` and `metadata` attributes, making them a universal way to store unstructured text and associated metadata.

 ```python
@@ -126,7 +126,7 @@ to the documentation of the specific vectorstore you are using to see what simil
 Given a similarity metric to measure the distance between the embedded query and any embedded document, we need an algorithm to efficiently search over *all* the embedded documents to find the most similar ones.
 There are various ways to do this. As an example, many vectorstores implement [HNSW (Hierarchical Navigable Small World)](https://www.pinecone.io/learn/series/faiss/hnsw/), a graph-based index structure that allows for efficient similarity search.
 Regardless of the search algorithm used under the hood, the LangChain vectorstore interface has a `similarity_search` method for all integrations. 
-This will take the search query, create an embedding, find similar documents, and return them as a list of [Documents](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html).
+This will take the search query, create an embedding, find similar documents, and return them as a list of [Documents](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html).

 ```python
 query = "my query"
--- a/docs/docs/contributing/how_to/integrations/index.mdx
+++ b/docs/docs/contributing/how_to/integrations/index.mdx
@@ -4,14 +4,16 @@ pagination_next: contributing/how_to/integrations/package

 # Contribute Integrations

-LangChain integrations are packages that provide access to language models, vector stores, and other components that can be used in LangChain.
+Integrations are a core component of LangChain.
+LangChain provides standard interfaces for several different components (language models, vector stores, etc) that are crucial when building LLM applications.

-This guide will walk you through how to contribute new integrations to LangChain, by
-publishing an integration package to PyPi, and adding documentation for it
-to the LangChain Monorepo.

-These instructions will evolve over the next few months as we improve our integration
-processes.
+## Why contribute an integration to LangChain?
+
+- **Discoverability:** LangChain is the most used framework for building LLM applications, with over 20 million monthly downloads. LangChain integrations are discoverable by a large community of GenAI builders.
+- **Interoptability:** LangChain components expose a standard interface, allowing developers to easily swap them for each other. If you implement a LangChain integration, any developer using a different component will easily be able to swap yours in.
+- **Best Practices:** Through their standard interface, LangChain components encourage and facilitate best practices (streaming, async, etc)
+

 ## Components to Integrate

@@ -22,8 +24,7 @@ supported in LangChain

 :::

-While any component can be integrated into LangChain, at this time we are only accepting
-new integrations in the docs of the following kinds:
+While any component can be integrated into LangChain, there are specific types of integrations we encourage more:

 <table>
  <tr>
@@ -60,18 +61,30 @@ new integrations in the docs of the following kinds:

 ## How to contribute an integration

-The only step necessary to "be" a LangChain integration is to add documentation
-that will render on this site (https://python.langchain.com/).
+In order to contribute an integration, you should follow these steps:

-As a prerequisite to adding your integration to our documentation, you must:
-
-1. Confirm that your integration is in the [list of components](#components-to-integrate) we are currently accepting.
-2. [Implement your package](./package.mdx) and publish it to a public github repository.
+1. Confirm that your integration is in the [list of components](#components-to-integrate) we are currently encouraging.
+2. [Implement your package](/docs/contributing/how_to/integrations/package/) and publish it to a public github repository.
 3. [Implement the standard tests](./standard_tests) for your integration and successfully run them.
 4. [Publish your integration](./publish.mdx) by publishing the package to PyPi and add docs in the `docs/docs/integrations` directory of the LangChain monorepo.
+5. [Optional] Open and merge a PR to add documentation for your integration to the official LangChain docs.
+6. [Optional] Engage with the LangChain team for joint co-marketing ([see below](#co-marketing)).

-Once you have completed these steps, you can submit a PR to the LangChain monorepo to add your integration to the documentation.
+## Co-Marketing
+
+With over 20 million monthly downloads, LangChain has a large audience of developers building LLM applications.
+Besides just adding integrations, we also like to show them examples of cool tools or APIs they can use.
+
+While traditionally called "co-marketing", we like to think of this more as "co-education".
+For that reason, while we are happy to highlight your integration through our social media channels, we prefer to highlight examples that also serve some educational purpose.
+Our main social media channels are Twitter and LinkedIn.
+
+Here are some heuristics for types of content we are excited to promote:
+
+- **Integration announcement:** If you announce the integration with a link to the LangChain documentation page, we are happy to re-tweet/re-share on Twitter/LinkedIn.
+- **Educational content:** We highlight good educational content on the weekends - if you write a good blog or make a good YouTube video, we are happy to share there! Note that we prefer content that is NOT framed as "here's how to use integration XYZ", but rather "here's how to do ABC", as we find that is more educational and helpful for developers.
+- **End-to-end applications:** End-to-end applications are great resources for developers looking to build. We prefer to highlight applications that are more complex/agentic in nature, and that use [LangGraph](https://github.com/langchain-ai/langgraph) as the orchestration framework. We get particularly excited about anything involving long-term memory, human-in-the-loop interaction patterns, or multi-agent architectures.
+- **Research:** We love highlighting novel research! Whether it is research built on top of LangChain or that integrates with it.

 ## Further Reading
-
-To get started, let's learn [how to bootstrap a new integration package](./package.mdx) for LangChain.
+To get started, let's learn [how to implement an integration package](/docs/contributing/how_to/integrations/package/) for LangChain.
--- a/docs/docs/contributing/how_to/integrations/package.mdx
+++ b/docs/docs/contributing/how_to/integrations/package.mdx
@@ -2,23 +2,117 @@
 pagination_next: contributing/how_to/integrations/standard_tests
 pagination_prev: contributing/how_to/integrations/index
 ---
-# How to bootstrap a new integration package
+# How to implement an integration package

-This guide walks through the process of publishing a new LangChain integration 
-package to PyPi.
+This guide walks through the process of implementing a LangChain integration 
+package.

 Integration packages are just Python packages that can be installed with `pip install <your-package>`,
 which contain classes that are compatible with LangChain's core interfaces.

+We will cover:
+
+1. How to implement components, such as [chat models](/docs/concepts/chat_models/) and [vector stores](/docs/concepts/vectorstores/), that adhere
+to the LangChain interface;  
+2. (Optional) How to bootstrap a new integration package.
+
+## Implementing LangChain components
+
+LangChain components are subclasses of base classes in [langchain-core](/docs/concepts/architecture/#langchain-core).
+Examples include [chat models](/docs/concepts/chat_models/),
+[vector stores](/docs/concepts/vectorstores/), [tools](/docs/concepts/tools/),
+[embedding models](/docs/concepts/embedding_models/) and [retrievers](/docs/concepts/retrievers/).
+
+Your integration package will typically implement a subclass of at least one of these
+components. Expand the tabs below to see details on each.
+
+<details>
+    <summary>Chat models</summary>
+
+Refer to the [Custom Chat Model Guide](/docs/how_to/custom_chat_model) guide for
+detail on a starter chat model [implementation](/docs/how_to/custom_chat_model/#implementation).
+
+:::tip
+
+The model from the [Custom Chat Model Guide](/docs/how_to/custom_chat_model) is tested
+against the standard unit and integration tests in the LangChain Github repository.
+You can also access that implementation directly from Github
+[here](https://github.com/langchain-ai/langchain/blob/master/libs/standard-tests/tests/unit_tests/custom_chat_model.py).
+
+:::
+
+</details>
+
+<details>
+    <summary>Vector stores</summary>
+
+Your vector store implementation will depend on your chosen database technology.
+`langchain-core` includes a minimal
+[in-memory vector store](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.in_memory.InMemoryVectorStore.html)
+that we can use as a guide. You can access the code [here](https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/vectorstores/in_memory.py).
+
+All vector stores must inherit from the [VectorStore](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html)
+base class. This interface consists of methods for writing, deleting and searching
+for documents in the vector store.
+
+`VectorStore` supports a variety of synchronous and asynchronous search types (e.g., 
+nearest-neighbor or maximum marginal relevance), as well as interfaces for adding
+documents to the store. See the [API Reference](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html)
+for all supported methods. The required methods are tabulated below:
+
+| Method/Property         | Description                                          |
+|------------------------ |------------------------------------------------------|
+| `add_documents`         | Add documents to the vector store.                   |
+| `delete`                | Delete selected documents from vector store (by IDs) |
+| `get_by_ids`            | Get selected documents from vector store (by IDs)    |
+| `similarity_search`     | Get documents most similar to a query.               |
+| `embeddings` (property) | Embeddings object for vector store.                  |
+| `from_texts`            | Instantiate vector store via adding texts.           |
+
+Note that `InMemoryVectorStore` implements some optional search types, as well as
+convenience methods for loading and dumping the object to a file, but this is not
+necessary for all implementations.
+
+:::tip
+
+The [in-memory vector store](https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/vectorstores/in_memory.py)
+is tested against the standard tests in the LangChain Github repository.
+
+:::
+
+</details>
+
+<!-- <details>
+<summary>Embeddings</summary>
+
+</details>
+
+<details>
+<summary>Tools</summary>
+
+</details>
+
+<details>
+<summary>Retrievers</summary>
+
+</details>
+
+<details>
+<summary>Document Loaders</summary>
+
+</details> -->
+
+## (Optional) bootstrapping a new integration package
+
 In this guide, we will be using [Poetry](https://python-poetry.org/) for
 dependency management and packaging, and you're welcome to use any other tools you prefer.

-## **Prerequisites**
+### **Prerequisites**

 - [GitHub](https://github.com) account
 - [PyPi](https://pypi.org/) account

-## Boostrapping a new Python package with Poetry
+### Boostrapping a new Python package with Poetry

 First, install Poetry:

@@ -64,7 +158,7 @@ poetry install --with test

 You're now ready to start writing your integration package!

-## Writing your integration
+### Writing your integration

 Let's say you're building a simple integration package that provides a `ChatParrotLink`
 chat model integration for LangChain. Here's a simple example of what your project
@@ -86,183 +180,10 @@ All of these files should already exist from step 1, except for
 `chat_models.py` and `test_chat_models.py`! We will implement `test_chat_models.py` 
 later, following the [standard tests](../standard_tests) guide.

-To implement `chat_models.py`, let's copy the implementation from our
-[Custom Chat Model Guide](../../../../how_to/custom_chat_model).
+For `chat_models.py`, simply paste the contents of the chat model implementation
+[above](#implementing-langchain-components).

-<details>
-    <summary>chat_models.py</summary>
-```python title="langchain_parrot_link/chat_models.py"
-from typing import Any, Dict, Iterator, List, Optional
-
-from langchain_core.callbacks import (
-    CallbackManagerForLLMRun,
-)
-from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import (
-    AIMessage,
-    AIMessageChunk,
-    BaseMessage,
-)
-from langchain_core.messages.ai import UsageMetadata
-from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
-from pydantic import Field
-
-
-class ChatParrotLink(BaseChatModel):
-    """A custom chat model that echoes the first `parrot_buffer_length` characters
-    of the input.
-
-    When contributing an implementation to LangChain, carefully document
-    the model including the initialization parameters, include
-    an example of how to initialize the model and include any relevant
-    links to the underlying models documentation or API.
-
-    Example:
-
-        .. code-block:: python
-
-            model = ChatParrotLink(parrot_buffer_length=2, model="bird-brain-001")
-            result = model.invoke([HumanMessage(content="hello")])
-            result = model.batch([[HumanMessage(content="hello")],
-                                 [HumanMessage(content="world")]])
-    """
-
-    model_name: str = Field(alias="model")
-    """The name of the model"""
-    parrot_buffer_length: int
-    """The number of characters from the last message of the prompt to be echoed."""
-    temperature: Optional[float] = None
-    max_tokens: Optional[int] = None
-    timeout: Optional[int] = None
-    stop: Optional[List[str]] = None
-    max_retries: int = 2
-
-    def _generate(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> ChatResult:
-        """Override the _generate method to implement the chat model logic.
-
-        This can be a call to an API, a call to a local model, or any other
-        implementation that generates a response to the input prompt.
-
-        Args:
-            messages: the prompt composed of a list of messages.
-            stop: a list of strings on which the model should stop generating.
-                  If generation stops due to a stop token, the stop token itself
-                  SHOULD BE INCLUDED as part of the output. This is not enforced
-                  across models right now, but it's a good practice to follow since
-                  it makes it much easier to parse the output of the model
-                  downstream and understand why generation stopped.
-            run_manager: A run manager with callbacks for the LLM.
-        """
-        # Replace this with actual logic to generate a response from a list
-        # of messages.
-        last_message = messages[-1]
-        tokens = last_message.content[: self.parrot_buffer_length]
-        ct_input_tokens = sum(len(message.content) for message in messages)
-        ct_output_tokens = len(tokens)
-        message = AIMessage(
-            content=tokens,
-            additional_kwargs={},  # Used to add additional payload to the message
-            response_metadata={  # Use for response metadata
-                "time_in_seconds": 3,
-            },
-            usage_metadata={
-                "input_tokens": ct_input_tokens,
-                "output_tokens": ct_output_tokens,
-                "total_tokens": ct_input_tokens + ct_output_tokens,
-            },
-        )
-        ##
-
-        generation = ChatGeneration(message=message)
-        return ChatResult(generations=[generation])
-
-    def _stream(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> Iterator[ChatGenerationChunk]:
-        """Stream the output of the model.
-
-        This method should be implemented if the model can generate output
-        in a streaming fashion. If the model does not support streaming,
-        do not implement it. In that case streaming requests will be automatically
-        handled by the _generate method.
-
-        Args:
-            messages: the prompt composed of a list of messages.
-            stop: a list of strings on which the model should stop generating.
-                  If generation stops due to a stop token, the stop token itself
-                  SHOULD BE INCLUDED as part of the output. This is not enforced
-                  across models right now, but it's a good practice to follow since
-                  it makes it much easier to parse the output of the model
-                  downstream and understand why generation stopped.
-            run_manager: A run manager with callbacks for the LLM.
-        """
-        last_message = messages[-1]
-        tokens = str(last_message.content[: self.parrot_buffer_length])
-        ct_input_tokens = sum(len(message.content) for message in messages)
-
-        for token in tokens:
-            usage_metadata = UsageMetadata(
-                {
-                    "input_tokens": ct_input_tokens,
-                    "output_tokens": 1,
-                    "total_tokens": ct_input_tokens + 1,
-                }
-            )
-            ct_input_tokens = 0
-            chunk = ChatGenerationChunk(
-                message=AIMessageChunk(content=token, usage_metadata=usage_metadata)
-            )
-
-            if run_manager:
-                # This is optional in newer versions of LangChain
-                # The on_llm_new_token will be called automatically
-                run_manager.on_llm_new_token(token, chunk=chunk)
-
-            yield chunk
-
-        # Let's add some other information (e.g., response metadata)
-        chunk = ChatGenerationChunk(
-            message=AIMessageChunk(content="", response_metadata={"time_in_sec": 3})
-        )
-        if run_manager:
-            # This is optional in newer versions of LangChain
-            # The on_llm_new_token will be called automatically
-            run_manager.on_llm_new_token(token, chunk=chunk)
-        yield chunk
-
-    @property
-    def _llm_type(self) -> str:
-        """Get the type of language model used by this chat model."""
-        return "echoing-chat-model-advanced"
-
-    @property
-    def _identifying_params(self) -> Dict[str, Any]:
-        """Return a dictionary of identifying parameters.
-
-        This information is used by the LangChain callback system, which
-        is used for tracing purposes make it possible to monitor LLMs.
-        """
-        return {
-            # The model name allows users to specify custom token counting
-            # rules in LLM monitoring applications (e.g., in LangSmith users
-            # can provide per token pricing for their model and monitor
-            # costs for the given LLM.)
-            "model_name": self.model_name,
-        }
-```
-</details>
-
-## Push your package to a public Github repository
+### Push your package to a public Github repository

 This is only required if you want to publish your integration in the LangChain documentation.

--- a/docs/docs/contributing/how_to/integrations/standard_tests.ipynb
+++ b/docs/docs/contributing/how_to/integrations/standard_tests.ipynb
@@ -219,6 +219,41 @@
    "    <p>Note: The standard tests for chat models are implemented in the example in the main body of this guide too.</p>"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Chat model standard tests test a range of behaviors, from the most basic requirements (generating a response to a query) to optional capabilities like multi-modal support and tool-calling. For a test run to be successful:\n",
+    "\n",
+    "1. If a feature is intended to be supported by the model, it should pass;\n",
+    "2. If a feature is not intended to be supported by the model, it should be skipped.\n",
+    "\n",
+    "Tests for \"optional\" capabilities are controlled via a set of properties that can be overridden on the test model subclass.\n",
+    "\n",
+    "You can see the entire list of properties in the API reference [here](https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelTests.html). These properties are shared by both unit and integration tests.\n",
+    "\n",
+    "For example, to enable integration tests for image inputs, we can implement\n",
+    "\n",
+    "```python\n",
+    "@property\n",
+    "def supports_image_inputs(self) -> bool:\n",
+    "    return True\n",
+    "```\n",
+    "\n",
+    "on the integration test class.\n",
+    "\n",
+    ":::note\n",
+    "\n",
+    "Details on what tests are run, how each test can be skipped, and troubleshooting tips for each test can be found in the API references. See details:\n",
+    "\n",
+    "- [Unit tests API reference](https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelUnitTests.html)\n",
+    "- [Integration tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.html)\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "Unit test example:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -246,6 +281,13 @@
    "        }"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Integration test example:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -418,6 +460,14 @@
    "    <summary>Vector Stores</summary>"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here's how you would configure the standard tests for a typical vector store (using\n",
+    "`ParrotVectorStore` as a placeholder):"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -465,6 +515,59 @@
    "            pass"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are separate suites for testing synchronous and asynchronous methods.\n",
+    "Configuring the tests consists of implementing pytest fixtures for setting up an\n",
+    "empty vector store and tearing down the vector store after the test run ends.\n",
+    "\n",
+    "For example, below is the `ReadWriteTestSuite` for the [Chroma](https://python.langchain.com/docs/integrations/vectorstores/chroma/)\n",
+    "integration:\n",
+    "\n",
+    "```python\n",
+    "from typing import Generator\n",
+    "\n",
+    "import pytest\n",
+    "from langchain_core.vectorstores import VectorStore\n",
+    "from langchain_tests.integration_tests.vectorstores import ReadWriteTestSuite\n",
+    "\n",
+    "from langchain_chroma import Chroma\n",
+    "\n",
+    "\n",
+    "class TestSync(ReadWriteTestSuite):\n",
+    "    @pytest.fixture()\n",
+    "    def vectorstore(self) -> Generator[VectorStore, None, None]:  # type: ignore\n",
+    "        \"\"\"Get an empty vectorstore.\"\"\"\n",
+    "        store = Chroma(embedding_function=self.get_embeddings())\n",
+    "        try:\n",
+    "            yield store\n",
+    "        finally:\n",
+    "            store.delete_collection()\n",
+    "            pass\n",
+    "```\n",
+    "\n",
+    "Note that before the initial `yield`, we instantiate the vector store with an\n",
+    "[embeddings](/docs/concepts/embedding_models/) object. This is a pre-defined\n",
+    "[\"fake\" embeddings model](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.html#langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.get_embeddings)\n",
+    "that will generate short, arbitrary vectors for documents. You can use a different\n",
+    "embeddings object if desired.\n",
+    "\n",
+    "In the `finally` block, we call whatever integration-specific logic is needed to\n",
+    "bring the vector store to a clean state. This logic is executed in between each test\n",
+    "(e.g., even if tests fail).\n",
+    "\n",
+    ":::note\n",
+    "\n",
+    "Details on what tests are run, how each test can be skipped, and troubleshooting tips for each test can be found in the API references. See details:\n",
+    "\n",
+    "- [Sync tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.html)\n",
+    "- [Async tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.AsyncReadWriteTestSuite.html)\n",
+    "\n",
+    ":::"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
--- a/docs/docs/how_to/custom_tools.ipynb
+++ b/docs/docs/how_to/custom_tools.ipynb
@@ -162,7 +162,7 @@
    "\n",
    "@tool\n",
    "def multiply_by_max(\n",
-    "    a: Annotated[str, \"scale factor\"],\n",
+    "    a: Annotated[int, \"scale factor\"],\n",
    "    b: Annotated[List[int], \"list of ints over which to take maximum\"],\n",
    ") -> int:\n",
    "    \"\"\"Multiply a by the maximum of b.\"\"\"\n",
--- a/docs/docs/how_to/graph_constructing.ipynb
+++ b/docs/docs/how_to/graph_constructing.ipynb
@@ -245,7 +245,7 @@
    "    allowed_nodes=[\"Person\", \"Country\", \"Organization\"],\n",
    "    allowed_relationships=allowed_relationships,\n",
    ")\n",
-    "llm_transformer_tuple = llm_transformer_filtered.convert_to_graph_documents(documents)\n",
+    "graph_documents_filtered = llm_transformer_tuple.convert_to_graph_documents(documents)\n",
    "print(f\"Nodes:{graph_documents_filtered[0].nodes}\")\n",
    "print(f\"Relationships:{graph_documents_filtered[0].relationships}\")"
   ]
--- a/docs/docs/how_to/graph_semantic.ipynb
+++ b/docs/docs/how_to/graph_semantic.ipynb
--- a/docs/docs/how_to/installation.mdx
+++ b/docs/docs/how_to/installation.mdx
@@ -51,7 +51,7 @@ pip install langchain-core

 Certain integrations like OpenAI and Anthropic have their own packages.
 Any integrations that require their own package will be documented as such in the [Integration docs](/docs/integrations/providers/).
-You can see a list of all integration packages in the [API reference](https://api.python.langchain.com) under the "Partner libs" dropdown.
+You can see a list of all integration packages in the [API reference](https://python.langchain.com/api_reference/) under the "Partner libs" dropdown.
 To install one of these run:

 ```bash
--- a/docs/docs/integrations/chat/cerebras.ipynb
+++ b/docs/docs/integrations/chat/cerebras.ipynb
@@ -17,7 +17,7 @@
   "source": [
    "# ChatCerebras\n",
    "\n",
-    "This notebook provides a quick overview for getting started with Cerebras [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatCerebras features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html).\n",
+    "This notebook provides a quick overview for getting started with Cerebras [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatCerebras features and configurations head to the [API reference](https://python.langchain.com/api_reference/cerebras/chat_models/langchain_cerebras.chat_models.ChatCerebras.html#).\n",
    "\n",
    "At Cerebras, we've developed the world's largest and fastest AI processor, the Wafer-Scale Engine-3 (WSE-3). The Cerebras CS-3 system, powered by the WSE-3, represents a new class of AI supercomputer that sets the standard for generative AI training and inference with unparalleled performance and scalability.\n",
    "\n",
@@ -37,7 +37,7 @@
    "\n",
    "| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/cerebras) | Package downloads | Package latest |\n",
    "| :--- | :--- | :---: | :---: |  :---: | :---: | :---: |\n",
-    "| [ChatCerebras](https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html) | [langchain-cerebras](https://api.python.langchain.com/en/latest/cerebras_api_reference.html) | ❌ | beta | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-cerebras?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-cerebras?style=flat-square&label=%20) |\n",
+    "| [ChatCerebras](https://python.langchain.com/api_reference/cerebras/chat_models/langchain_cerebras.chat_models.ChatCerebras.html#) | [langchain-cerebras](https://python.langchain.com/api_reference/cerebras/index.html) | ❌ | beta | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-cerebras?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-cerebras?style=flat-square&label=%20) |\n",
    "\n",
    "### Model features\n",
    "| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
@@ -396,7 +396,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For detailed documentation of all ChatCerebras features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_cerebras.chat_models.ChatCerebras.html"
+    "For detailed documentation of all ChatCerebras features and configurations head to the API reference: https://python.langchain.com/api_reference/cerebras/chat_models/langchain_cerebras.chat_models.ChatCerebras.html#"
   ]
  }
 ],
--- a/docs/docs/integrations/chat/mistralai.ipynb
+++ b/docs/docs/integrations/chat/mistralai.ipynb
@@ -39,7 +39,7 @@
    "### Credentials\n",
    "\n",
    "\n",
-    "A valid [API key](https://console.mistral.ai/users/api-keys/) is needed to communicate with the API. Once you've done this set the MISTRAL_API_KEY environment variable:"
+    "A valid [API key](https://console.mistral.ai/api-keys/) is needed to communicate with the API. Once you've done this set the MISTRAL_API_KEY environment variable:"
   ]
  },
  {
--- a/docs/docs/integrations/chat/oci_data_science.ipynb
+++ b/docs/docs/integrations/chat/oci_data_science.ipynb
@@ -19,7 +19,7 @@
   "source": [
    "# ChatOCIModelDeployment\n",
    "\n",
-    "This will help you getting started with OCIModelDeployment [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatOCIModelDeployment features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.ChatOCIModelDeployment.html).\n",
+    "This will help you getting started with OCIModelDeployment [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatOCIModelDeployment features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.oci_data_science.ChatOCIModelDeployment.html).\n",
    "\n",
    "[OCI Data Science](https://docs.oracle.com/en-us/iaas/data-science/using/home.htm) is a fully managed and serverless platform for data science teams to build, train, and manage machine learning models in the Oracle Cloud Infrastructure. You can use [AI Quick Actions](https://blogs.oracle.com/ai-and-datascience/post/ai-quick-actions-in-oci-data-science) to easily deploy LLMs on [OCI Data Science Model Deployment Service](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm). You may choose to deploy the model with popular inference frameworks such as vLLM or TGI. By default, the model deployment endpoint mimics the OpenAI API protocol.\n",
    "\n",
@@ -30,7 +30,7 @@
    "\n",
    "| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
    "| :--- | :--- | :---: | :---: |  :---: | :---: | :---: |\n",
-    "| [ChatOCIModelDeployment](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.ChatOCIModelDeployment.html) | [langchain-community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ❌ | beta | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-community?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-community?style=flat-square&label=%20) |\n",
+    "| [ChatOCIModelDeployment](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.oci_data_science.ChatOCIModelDeployment.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ❌ | beta | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-community?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-community?style=flat-square&label=%20) |\n",
    "\n",
    "### Model features\n",
    "\n",
@@ -430,9 +430,9 @@
    "\n",
    "For comprehensive details on all features and configurations, please refer to the API reference documentation for each class:\n",
    "\n",
-    "* [ChatOCIModelDeployment](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.oci_data_science.ChatOCIModelDeployment.html)\n",
-    "* [ChatOCIModelDeploymentVLLM](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.oci_data_science.ChatOCIModelDeploymentVLLM.html)\n",
-    "* [ChatOCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.oci_data_science.ChatOCIModelDeploymentTGI.html)"
+    "* [ChatOCIModelDeployment](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.oci_data_science.ChatOCIModelDeployment.html)\n",
+    "* [ChatOCIModelDeploymentVLLM](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.oci_data_science.ChatOCIModelDeploymentVLLM.html)\n",
+    "* [ChatOCIModelDeploymentTGI](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.oci_data_science.ChatOCIModelDeploymentTGI.html)"
   ]
  }
 ],
--- a/docs/docs/integrations/chat/outlines.ipynb
+++ b/docs/docs/integrations/chat/outlines.ipynb
@@ -17,7 +17,7 @@
            "source": [
                "# ChatOutlines\n",
                "\n",
-                "This will help you getting started with Outlines [chat models](/docs/concepts/chat_models/). For detailed documentation of all ChatOutlines features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/outlines.chat_models.ChatOutlines.html).\n",
+                "This will help you getting started with Outlines [chat models](/docs/concepts/chat_models/). For detailed documentation of all ChatOutlines features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.outlines.ChatOutlines.html).\n",
                "\n",
                "[Outlines](https://github.com/outlines-dev/outlines) is a library for constrained language generation. It allows you to use large language models (LLMs) with various backends while applying constraints to the generated output.\n",
                "\n",
@@ -26,7 +26,7 @@
                "\n",
                "| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
                "| :--- | :--- | :---: | :---: |  :---: | :---: | :---: |\n",
-                "| [ChatOutlines](https://api.python.langchain.com/en/latest/chat_models/outlines.chat_models.ChatOutlines.html) | [langchain-community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-community?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-community?style=flat-square&label=%20) |\n",
+                "| [ChatOutlines](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.outlines.ChatOutlines.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ✅ | ❌ | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-community?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-community?style=flat-square&label=%20) |\n",
                "\n",
                "### Model features\n",
                "| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
@@ -316,7 +316,7 @@
            "source": [
                "## API reference\n",
                "\n",
-                "For detailed documentation of all ChatOutlines features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/outlines.chat_models.ChatOutlines.html\n",
+                "For detailed documentation of all ChatOutlines features and configurations head to the API reference: https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.outlines.ChatOutlines.html\n",
                "\n",
                "## Full Outlines Documentation: \n",
                "\n",
--- a/docs/docs/integrations/chat/sambastudio.ipynb
+++ b/docs/docs/integrations/chat/sambastudio.ipynb
@@ -19,7 +19,7 @@
   "source": [
    "# ChatSambaStudio\n",
    "\n",
-    "This will help you getting started with SambaStudio [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatStudio features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html).\n",
+    "This will help you getting started with SambaStudio [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatStudio features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html).\n",
    "\n",
    "**[SambaNova](https://sambanova.ai/)'s** [SambaStudio](https://docs.sambanova.ai/sambastudio/latest/sambastudio-intro.html) SambaStudio is a rich, GUI-based platform that provides the functionality to train, deploy, and manage models in SambaNova [DataScale](https://sambanova.ai/products/datascale) systems.\n",
    "\n",
@@ -28,7 +28,7 @@
    "\n",
    "| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
    "| :--- | :--- | :---: | :---: |  :---: | :---: | :---: |\n",
-    "| [ChatSambaStudio](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ❌ | ❌ | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain_community?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain_community?style=flat-square&label=%20) |\n",
+    "| [ChatSambaStudio](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ❌ | ❌ | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain_community?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain_community?style=flat-square&label=%20) |\n",
    "\n",
    "### Model features\n",
    "\n",
@@ -355,7 +355,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For detailed documentation of all ChatSambaStudio features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html"
+    "For detailed documentation of all ChatSambaStudio features and configurations head to the API reference: https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html"
   ]
  }
 ],
--- a/docs/docs/integrations/document_loaders/needle.ipynb
+++ b/docs/docs/integrations/document_loaders/needle.ipynb
@@ -0,0 +1,253 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Needle Document Loader\n",
+    "[Needle](https://needle-ai.com) makes it easy to create your RAG pipelines with minimal effort. \n",
+    "\n",
+    "For more details, refer to our [API documentation](https://docs.needle-ai.com/docs/api-reference/needle-api)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Overview\n",
+    "The Needle Document Loader is a utility for integrating Needle collections with LangChain. It enables seamless storage, retrieval, and utilization of documents for Retrieval-Augmented Generation (RAG) workflows.\n",
+    "\n",
+    "This example demonstrates:\n",
+    "\n",
+    "* Storing documents into a Needle collection.\n",
+    "* Setting up a retriever to fetch documents.\n",
+    "* Building a Retrieval-Augmented Generation (RAG) pipeline."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Setup\n",
+    "Before starting, ensure you have the following environment variables set:\n",
+    "\n",
+    "* NEEDLE_API_KEY: Your API key for authenticating with Needle.\n",
+    "* OPENAI_API_KEY: Your OpenAI API key for language model operations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"NEEDLE_API_KEY\"] = \"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"OPENAI_API_KEY\"] = \"\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialization\n",
+    "To initialize the NeedleLoader, you need the following parameters:\n",
+    "\n",
+    "* needle_api_key: Your Needle API key (or set it as an environment variable).\n",
+    "* collection_id: The ID of the Needle collection to work with."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Instantiation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders.needle import NeedleLoader\n",
+    "\n",
+    "collection_id = \"clt_01J87M9T6B71DHZTHNXYZQRG5H\"\n",
+    "\n",
+    "# Initialize NeedleLoader to store documents to the collection\n",
+    "document_loader = NeedleLoader(\n",
+    "    needle_api_key=os.getenv(\"NEEDLE_API_KEY\"),\n",
+    "    collection_id=collection_id,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load\n",
+    "To add files to the Needle collection:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "files = {\n",
+    "    \"tech-radar-30.pdf\": \"https://www.thoughtworks.com/content/dam/thoughtworks/documents/radar/2024/04/tr_technology_radar_vol_30_en.pdf\"\n",
+    "}\n",
+    "\n",
+    "document_loader.add_files(files=files)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show the documents in the collection\n",
+    "# collections_documents = document_loader.load()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lazy Load\n",
+    "The lazy_load method allows you to iteratively load documents from the Needle collection, yielding each document as it is fetched:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show the documents in the collection\n",
+    "# collections_documents = document_loader.lazy_load()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Usage\n",
+    "### Use within a chain\n",
+    "Below is a complete example of setting up a RAG pipeline with Needle within a chain:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'input': 'Did RAG move to accepted?',\n",
+       " 'context': [Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.'),\n",
+       "  Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.'),\n",
+       "  Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.'),\n",
+       "  Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.'),\n",
+       "  Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.')],\n",
+       " 'answer': 'Yes, RAG has been adopted as the preferred pattern for improving the quality of responses generated by a large language model.'}"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "from langchain.chains import create_retrieval_chain\n",
+    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
+    "from langchain_community.retrievers.needle import NeedleRetriever\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(temperature=0)\n",
+    "\n",
+    "# Initialize the Needle retriever (make sure your Needle API key is set as an environment variable)\n",
+    "retriever = NeedleRetriever(\n",
+    "    needle_api_key=os.getenv(\"NEEDLE_API_KEY\"),\n",
+    "    collection_id=\"clt_01J87M9T6B71DHZTHNXYZQRG5H\",\n",
+    ")\n",
+    "\n",
+    "# Define system prompt for the assistant\n",
+    "system_prompt = \"\"\"\n",
+    "    You are an assistant for question-answering tasks. \n",
+    "    Use the following pieces of retrieved context to answer the question.\n",
+    "    If you don't know, say so concisely.\\n\\n{context}\n",
+    "    \"\"\"\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [(\"system\", system_prompt), (\"human\", \"{input}\")]\n",
+    ")\n",
+    "\n",
+    "# Define the question-answering chain using a document chain (stuff chain) and the retriever\n",
+    "question_answer_chain = create_stuff_documents_chain(llm, prompt)\n",
+    "\n",
+    "# Create the RAG (Retrieval-Augmented Generation) chain by combining the retriever and the question-answering chain\n",
+    "rag_chain = create_retrieval_chain(retriever, question_answer_chain)\n",
+    "\n",
+    "# Define the input query\n",
+    "query = {\"input\": \"Did RAG move to accepted?\"}\n",
+    "\n",
+    "response = rag_chain.invoke(query)\n",
+    "\n",
+    "response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For detailed documentation of all `Needle` features and configurations head to the API reference: https://docs.needle-ai.com"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/docs/docs/integrations/document_loaders/sitemap.ipynb
+++ b/docs/docs/integrations/document_loaders/sitemap.ipynb
@@ -103,7 +103,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
--- a/docs/docs/integrations/document_loaders/tomarkdown.ipynb
+++ b/docs/docs/integrations/document_loaders/tomarkdown.ipynb
@@ -153,7 +153,7 @@
      "\n",
      "Best practices for developing with LangChain.\n",
      "\n",
-      "### [API reference](https://api.python.langchain.com) [](\\#api-reference \"Direct link to api-reference\")\n",
+      "### [API reference](https://python.langchain.com/api_reference/) [](\\#api-reference \"Direct link to api-reference\")\n",
      "\n",
      "Head to the reference section for full documentation of all classes and methods in the LangChain and LangChain Experimental Python packages.\n",
      "\n",
--- a/docs/docs/integrations/llms/ai21.ipynb
+++ b/docs/docs/integrations/llms/ai21.ipynb
@@ -17,6 +17,9 @@
   "source": [
    "# AI21LLM\n",
    "\n",
+    ":::caution This service is deprecated.\n",
+    "See [this page](https://python.langchain.com/docs/integrations/chat/ai21/) for the updated ChatAI21 object. :::\n",
+    "\n",
    "This example goes over how to use LangChain to interact with `AI21` Jurassic models. To use the Jamba model, use the [ChatAI21 object](https://python.langchain.com/docs/integrations/chat/ai21/) instead.\n",
    "\n",
    "[See a full list of AI21 models and tools on LangChain.](https://pypi.org/project/langchain-ai21/)\n",
--- a/docs/docs/integrations/llms/oci_model_deployment_endpoint.ipynb
+++ b/docs/docs/integrations/llms/oci_model_deployment_endpoint.ipynb
@@ -180,9 +180,9 @@
    "\n",
    "For comprehensive details on all features and configurations, please refer to the API reference documentation for each class:\n",
    "\n",
-    "* [OCIModelDeploymentLLM](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentLLM.html)\n",
-    "* [OCIModelDeploymentVLLM](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentVLLM.html)\n",
-    "* [OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html)"
+    "* [OCIModelDeploymentLLM](https://python.langchain.com/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentLLM.html)\n",
+    "* [OCIModelDeploymentVLLM](https://python.langchain.com/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentVLLM.html)\n",
+    "* [OCIModelDeploymentTGI](https://python.langchain.com/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html)"
   ]
  }
 ],
--- a/docs/docs/integrations/llms/outlines.ipynb
+++ b/docs/docs/integrations/llms/outlines.ipynb
@@ -6,7 +6,7 @@
   "source": [
    "# Outlines\n",
    "\n",
-    "This will help you getting started with Outlines LLM. For detailed documentation of all Outlines features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/llms/outlines.llms.Outlines.html).\n",
+    "This will help you getting started with Outlines LLM. For detailed documentation of all Outlines features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/llms/langchain_community.llms.outlines.Outlines.html).\n",
    "\n",
    "[Outlines](https://github.com/outlines-dev/outlines) is a library for constrained language generation. It allows you to use large language models (LLMs) with various backends while applying constraints to the generated output.\n",
    "\n",
@@ -236,7 +236,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For detailed documentation of all ChatOutlines features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/outlines.chat_models.ChatOutlines.html\n",
+    "For detailed documentation of all ChatOutlines features and configurations head to the API reference: https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.outlines.ChatOutlines.html\n",
    "\n",
    "## Outlines Documentation: \n",
    "\n",
--- a/docs/docs/integrations/providers/aerospike.mdx
+++ b/docs/docs/integrations/providers/aerospike.mdx
@@ -0,0 +1,24 @@
+# Aerospike
+
+>[Aerospike](https://aerospike.com/docs/vector) is a high-performance, distributed database known for its speed and scalability, now with support for vector storage and search, enabling retrieval and search of embedding vectors for machine learning and AI applications.
+> See the documentation for Aerospike Vector Search (AVS) [here](https://aerospike.com/docs/vector).
+
+## Installation and Setup
+
+Install the AVS Python SDK and AVS langchain vector store:
+
+```bash
+pip install aerospike-vector-search langchain-community
+
+See the documentation for the Ptyhon SDK [here](https://aerospike-vector-search-python-client.readthedocs.io/en/latest/index.html).
+The documentation for the AVS langchain vector store is [here](https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.aerospike.Aerospike.html).
+
+## Vector Store
+
+To import this vectorstore:
+
+```python
+from langchain_community.vectorstores import Aerospike
+
+See a usage example [here](https://python.langchain.com/docs/integrations/vectorstores/aerospike/).
+
--- a/docs/docs/integrations/providers/ai21.mdx
+++ b/docs/docs/integrations/providers/ai21.mdx
@@ -15,26 +15,6 @@ This page covers how to use the `AI21` ecosystem within `LangChain`.
 pip install langchain-ai21
 ```

-## LLMs
-
-See a [usage example](/docs/integrations/llms/ai21).
-
-### AI21 LLM
-
-```python
-from langchain_ai21 import AI21LLM
-```
-
-### AI21 Contextual Answer
-
-You can use AI21’s contextual answers model to receive text or document, 
-serving as a context, and a question and return an answer based entirely on this context.
-
-```python
-from langchain_ai21 import AI21ContextualAnswers
-```
-
-
 ## Chat models

 ### AI21 Chat 
@@ -45,23 +25,32 @@ See a [usage example](/docs/integrations/chat/ai21).
 from langchain_ai21 import ChatAI21
 ```

-## Embedding models
+## Deprecated features
+
+:::caution The following features are deprecated. 
+:::
+
+### AI21 LLM
+
+```python
+from langchain_ai21 import AI21LLM
+```
+
+### AI21 Contextual Answer
+
+```python
+from langchain_ai21 import AI21ContextualAnswers
+```

 ### AI21 Embeddings

-See a [usage example](/docs/integrations/text_embedding/ai21).
-
 ```python
 from langchain_ai21 import AI21Embeddings
 ```
-
 ## Text splitters

 ### AI21 Semantic Text Splitter

-See a [usage example](/docs/integrations/document_transformers/ai21_semantic_text_splitter).
-
 ```python
 from langchain_ai21 import AI21SemanticTextSplitter
-```
-
+```
--- a/docs/docs/integrations/providers/pinecone.mdx
+++ b/docs/docs/integrations/providers/pinecone.mdx
@@ -32,7 +32,7 @@ For a more detailed walkthrough of the Pinecone vectorstore, see [this notebook]
 ### Pinecone Hybrid Search

 ```bash
-pip install pinecone-client pinecone-text
+pip install pinecone pinecone-text
 ```

 ```python
--- a/docs/docs/integrations/providers/unstructured.mdx
+++ b/docs/docs/integrations/providers/unstructured.mdx
@@ -22,7 +22,7 @@ dependencies running.

 - To run everything locally, install the open-source python package with `pip install unstructured`
  along with `pip install langchain-community` and use the same `UnstructuredLoader` as mentioned above.
-    - You can install document specific dependencies with extras, e.g. `pip install "unstructured[docx]"`.
+    - You can install document specific dependencies with extras, e.g. `pip install "unstructured[docx]"`. Learn more about extras [here](https://docs.unstructured.io/open-source/installation/full-installation).
    - To install the dependencies for all document types, use `pip install "unstructured[all-docs]"`.
 - Install the following system dependencies if they are not already available on your system with e.g. `brew install` for Mac.
  Depending on what document types you're parsing, you may not need all of these.
--- a/docs/docs/integrations/retrievers/fleet_context.ipynb
+++ b/docs/docs/integrations/retrievers/fleet_context.ipynb
@@ -9,7 +9,7 @@
    "\n",
    ">[Fleet AI Context](https://www.fleet.so/context) is a dataset of high-quality embeddings of the top 1200 most popular & permissive Python Libraries & their documentation.\n",
    ">\n",
-    ">The `Fleet AI` team is on a mission to embed the world's most important data. They've started by embedding the top 1200 Python libraries to enable code generation with up-to-date knowledge. They've been kind enough to share their embeddings of the [LangChain docs](/docs/introduction) and [API reference](https://api.python.langchain.com/en/latest/api_reference.html).\n",
+    ">The `Fleet AI` team is on a mission to embed the world's most important data. They've started by embedding the top 1200 Python libraries to enable code generation with up-to-date knowledge. They've been kind enough to share their embeddings of the [LangChain docs](/docs/introduction) and [API reference](https://python.langchain.com/api_reference/).\n",
    "\n",
    "Let's take a look at how we can use these embeddings to power a docs retrieval system and ultimately a simple code-generating chain!"
   ]
--- a/docs/docs/integrations/retrievers/needle.ipynb
+++ b/docs/docs/integrations/retrievers/needle.ipynb
@@ -0,0 +1,235 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Needle Retriever\n",
+    "[Needle](https://needle-ai.com) makes it easy to create your RAG pipelines with minimal effort. \n",
+    "\n",
+    "For more details, refer to our [API documentation](https://docs.needle-ai.com/docs/api-reference/needle-api)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Overview\n",
+    "The Needle Document Loader is a utility for integrating Needle collections with LangChain. It enables seamless storage, retrieval, and utilization of documents for Retrieval-Augmented Generation (RAG) workflows.\n",
+    "\n",
+    "This example demonstrates:\n",
+    "\n",
+    "* Storing documents into a Needle collection.\n",
+    "* Setting up a retriever to fetch documents.\n",
+    "* Building a Retrieval-Augmented Generation (RAG) pipeline."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Setup\n",
+    "Before starting, ensure you have the following environment variables set:\n",
+    "\n",
+    "* NEEDLE_API_KEY: Your API key for authenticating with Needle.\n",
+    "* OPENAI_API_KEY: Your OpenAI API key for language model operations."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialization\n",
+    "To initialize the NeedleLoader, you need the following parameters:\n",
+    "\n",
+    "* needle_api_key: Your Needle API key (or set it as an environment variable).\n",
+    "* collection_id: The ID of the Needle collection to work with."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"NEEDLE_API_KEY\"] = \"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"OPENAI_API_KEY\"] = \"\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Instantiation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders.needle import NeedleLoader\n",
+    "\n",
+    "collection_id = \"clt_01J87M9T6B71DHZTHNXYZQRG5H\"\n",
+    "\n",
+    "# Initialize NeedleLoader to store documents to the collection\n",
+    "document_loader = NeedleLoader(\n",
+    "    needle_api_key=os.getenv(\"NEEDLE_API_KEY\"),\n",
+    "    collection_id=collection_id,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load\n",
+    "To add files to the Needle collection:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "files = {\n",
+    "    \"tech-radar-30.pdf\": \"https://www.thoughtworks.com/content/dam/thoughtworks/documents/radar/2024/04/tr_technology_radar_vol_30_en.pdf\"\n",
+    "}\n",
+    "\n",
+    "document_loader.add_files(files=files)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show the documents in the collection\n",
+    "# collections_documents = document_loader.load()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Usage\n",
+    "### Use within a chain\n",
+    "Below is a complete example of setting up a RAG pipeline with Needle within a chain:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'input': 'Did RAG move to accepted?',\n",
+       " 'context': [Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.'),\n",
+       "  Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.'),\n",
+       "  Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.'),\n",
+       "  Document(metadata={}, page_content='New Moved in/out No change\\n\\n© Thoughtworks, Inc. All Rights Reserved. 12\\n\\nTechniques\\n\\n1. Retrieval-augmented generation (RAG)\\nAdopt\\n\\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of \\nresponses generated by a large language model (LLM). We’ve successfully used it in several projects, \\nincluding the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy \\ndocuments — in formats like HTML and PDF — are stored in databases that supports a vector data \\ntype or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For \\na given prompt, the database is queried to retrieve relevant documents, which are then combined \\nwith the prompt to provide richer context to the LLM. This results in higher quality output and greatly \\nreduced hallucinations. The context window — which determines the maximum size of the LLM input \\n— is limited, which means that selecting the most relevant documents is crucial. We improve the \\nrelevancy of the content that is added to the prompt by reranking. Similarly, the documents are usually \\ntoo large to calculate an embedding, which means they must be split into smaller chunks. This is often \\na difficult problem, and one approach is to have the chunks overlap to a certain extent.'),\n",
+       "  Document(metadata={}, page_content='https://www.thoughtworks.com/radar/tools/nemo-guardrails\\nhttps://www.thoughtworks.com/radar/platforms/langfuse\\nhttps://www.thoughtworks.com/radar/techniques/retrieval-augmented-generation-rag\\nhttps://cruisecontrol.sourceforge.net/\\nhttps://martinfowler.com/articles/continuousIntegration.html\\nhttps://www.thoughtworks.com/radar/techniques/peer-review-equals-pull-request\\nhttps://martinfowler.com/bliki/ContinuousIntegrationCertification.html\\nhttps://linearb.io/platform/gitstream\\nhttps://www.thoughtworks.com/radar/tools/github-merge-queue\\nhttps://stacking.dev/\\n\\n© Thoughtworks, Inc. All Rights Reserved. 8\\n\\nHold HoldAssess AssessTrial TrialAdopt Adopt\\n\\n18\\n\\n8\\n\\n24\\n\\n29\\n\\n30\\n31\\n\\n32\\n33\\n\\n34 35\\n\\n36\\n37\\n\\n38 39\\n\\n40\\n41\\n\\n42\\n43\\n\\n26\\n\\n2\\n\\n3\\n\\n4\\n\\n5\\n\\n6 7\\n\\n9\\n\\n15\\n\\n16\\n\\n17\\n\\n10\\n\\n11\\n\\n12\\n\\n13 14\\n\\n44\\n\\n47\\n49\\n\\n50\\n\\n65\\n66\\n\\n67 68\\n69\\n\\n70\\n71\\n\\n72\\n\\n73 74\\n\\n75\\n\\n76 77\\n\\n78\\n79\\n\\n80\\n81\\n\\n82\\n\\n83\\n\\n51\\n\\n52 54\\n\\n59\\n\\n53\\n56\\n\\n58\\n\\n61\\n\\n62\\n63\\n\\n64\\n\\n85\\n\\n88 89\\n\\n90 91\\n\\n92\\n93\\n\\n94\\n95 96\\n\\n97\\n\\n98 99\\n\\n100\\n\\n101\\n102\\n\\n103\\n\\n104\\n\\n86\\n\\n87\\n1921\\n\\n22\\n\\n20\\n28\\n\\n25\\n\\n27\\n\\n23\\n\\n84\\n\\n105\\n\\n1\\n45\\n\\n46\\n\\n48\\n\\n55\\n57')],\n",
+       " 'answer': 'Yes, RAG has moved to the \"Adopt\" status.'}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "from langchain.chains import create_retrieval_chain\n",
+    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
+    "from langchain_community.retrievers.needle import NeedleRetriever\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(temperature=0)\n",
+    "\n",
+    "# Initialize the Needle retriever (make sure your Needle API key is set as an environment variable)\n",
+    "retriever = NeedleRetriever(\n",
+    "    needle_api_key=os.getenv(\"NEEDLE_API_KEY\"),\n",
+    "    collection_id=\"clt_01J87M9T6B71DHZTHNXYZQRG5H\",\n",
+    ")\n",
+    "\n",
+    "# Define system prompt for the assistant\n",
+    "system_prompt = \"\"\"\n",
+    "    You are an assistant for question-answering tasks. \n",
+    "    Use the following pieces of retrieved context to answer the question.\n",
+    "    If you don't know, say so concisely.\\n\\n{context}\n",
+    "    \"\"\"\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [(\"system\", system_prompt), (\"human\", \"{input}\")]\n",
+    ")\n",
+    "\n",
+    "# Define the question-answering chain using a document chain (stuff chain) and the retriever\n",
+    "question_answer_chain = create_stuff_documents_chain(llm, prompt)\n",
+    "\n",
+    "# Create the RAG (Retrieval-Augmented Generation) chain by combining the retriever and the question-answering chain\n",
+    "rag_chain = create_retrieval_chain(retriever, question_answer_chain)\n",
+    "\n",
+    "# Define the input query\n",
+    "query = {\"input\": \"Did RAG move to accepted?\"}\n",
+    "\n",
+    "response = rag_chain.invoke(query)\n",
+    "\n",
+    "response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For detailed documentation of all `Needle` features and configurations head to the API reference: https://docs.needle-ai.com"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/docs/docs/integrations/retrievers/pinecone_hybrid_search.ipynb
+++ b/docs/docs/integrations/retrievers/pinecone_hybrid_search.ipynb
@@ -24,7 +24,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet  pinecone-client pinecone-text pinecone-notebooks"
+    "%pip install --upgrade --quiet  pinecone pinecone-text pinecone-notebooks"
   ]
  },
  {
--- a/docs/docs/integrations/text_embedding/ai21.ipynb
+++ b/docs/docs/integrations/text_embedding/ai21.ipynb
@@ -17,6 +17,8 @@
   "source": [
    "# AI21Embeddings\n",
    "\n",
+    ":::caution This service is deprecated. :::\n",
+    "\n",
    "This will help you get started with AI21 embedding models using LangChain. For detailed documentation on `AI21Embeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/ai21/embeddings/langchain_ai21.embeddings.AI21Embeddings.html).\n",
    "\n",
    "## Overview\n",
--- a/docs/docs/integrations/text_embedding/databricks.ipynb
+++ b/docs/docs/integrations/text_embedding/databricks.ipynb
@@ -28,7 +28,7 @@
    "\n",
    "| Class | Package |\n",
    "| :--- | :--- |\n",
-    "| [DatabricksEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_databricks.embeddings.DatabricksEmbeddings.html) | [databricks-langchain](https://python.langchain.com/docs/integrations/providers/databricks/) |\n",
+    "| [DatabricksEmbeddings](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.databricks.DatabricksEmbeddings.html) | [databricks-langchain](https://python.langchain.com/docs/integrations/providers/databricks/) |\n",
    "\n",
    "### Supported Methods\n",
    "\n",
--- a/docs/docs/integrations/text_embedding/zhipuai.ipynb
+++ b/docs/docs/integrations/text_embedding/zhipuai.ipynb
@@ -251,7 +251,7 @@
   "source": [
    "## API Reference\n",
    "\n",
-    "For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html).\n"
+    "For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html).\n"
   ]
  }
 ],
--- a/docs/docs/integrations/vectorstores/databricks_vector_search.ipynb
+++ b/docs/docs/integrations/vectorstores/databricks_vector_search.ipynb
@@ -506,7 +506,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For detailed documentation of all DatabricksVectorSearch features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_databricks.vectorstores.DatabricksVectorSearch.html"
+    "For detailed documentation of all DatabricksVectorSearch features and configurations head to the API reference: https://python.langchain.com/api_reference/databricks/vectorstores/langchain_databricks.vectorstores.DatabricksVectorSearch.html"
   ]
  }
 ],
--- a/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb
+++ b/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb
@@ -439,7 +439,7 @@
   "source": [
    "#### Other search methods\n",
    "\n",
-    "There are a variety of other search methods that are not covered in this notebook, such as MMR search or searching by vector. For a full list of the search abilities available for `MongoDBAtlasVectorStore` check out the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_mongodb.vectorstores.MongoDBAtlasVectorSearch.html)."
+    "There are a variety of other search methods that are not covered in this notebook, such as MMR search or searching by vector. For a full list of the search abilities available for `MongoDBAtlasVectorStore` check out the [API reference](https://python.langchain.com/api_reference/mongodb/vectorstores/langchain_mongodb.vectorstores.MongoDBAtlasVectorSearch.html)."
   ]
  },
  {
--- a/docs/docs/integrations/vectorstores/sap_hanavector.ipynb
+++ b/docs/docs/integrations/vectorstores/sap_hanavector.ipynb
@@ -22,7 +22,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {
    "tags": []
   },
@@ -41,7 +41,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-09-09T08:02:16.802456Z",
@@ -64,7 +64,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-09-09T08:02:28.174088Z",
@@ -73,8 +73,10 @@
   },
   "outputs": [],
   "source": [
+    "from dotenv import load_dotenv\n",
    "from hdbcli import dbapi\n",
    "\n",
+    "load_dotenv()\n",
    "# Use connection settings from the environment\n",
    "connection = dbapi.connect(\n",
    "    address=os.environ.get(\"HANA_DB_ADDRESS\"),\n",
@@ -102,14 +104,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-09-09T08:02:25.452472Z",
     "start_time": "2023-09-09T08:02:25.441563Z"
    }
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of document chunks: 88\n"
+     ]
+    }
+   ],
   "source": [
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_community.vectorstores.hanavector import HanaDB\n",
@@ -134,7 +144,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-09-09T08:04:16.696625Z",
@@ -157,9 +167,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "# Delete already existing documents from the table\n",
    "db.delete(filter={})\n",
@@ -178,9 +199,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
+      "\n",
+      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
+      "--------------------------------------------------------------------------------\n",
+      "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
+      "\n",
+      "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.\n"
+     ]
+    }
+   ],
   "source": [
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "docs = db.similarity_search(query, k=2)\n",
@@ -199,9 +235,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
+      "\n",
+      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
+      "--------------------------------------------------------------------------------\n",
+      "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
+      "\n",
+      "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.\n"
+     ]
+    }
+   ],
   "source": [
    "from langchain_community.vectorstores.utils import DistanceStrategy\n",
    "\n",
@@ -235,7 +286,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-09-09T08:05:23.276819Z",
@@ -246,7 +297,24 @@
     "outputs_hidden": false
    }
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
+      "\n",
+      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
+      "--------------------------------------------------------------------------------\n",
+      "Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n",
+      "\n",
+      "In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \n",
+      "\n",
+      "Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world.\n"
+     ]
+    }
+   ],
   "source": [
    "docs = db.max_marginal_relevance_search(query, k=2, fetch_k=20)\n",
    "for doc in docs:\n",
@@ -254,6 +322,86 @@
    "    print(doc.page_content)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating an HNSW Vector Index\n",
+    "\n",
+    "A vector index can significantly speed up top-k nearest neighbor queries for vectors. Users can create a Hierarchical Navigable Small World (HNSW) vector index using the `create_hnsw_index` function.\n",
+    "\n",
+    "For more information about creating an index at the database level, please refer to the [official documentation](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/create-vector-index-statement-data-definition).\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
+      "\n",
+      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
+      "--------------------------------------------------------------------------------\n",
+      "Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n",
+      "\n",
+      "In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \n",
+      "\n",
+      "Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# HanaDB instance uses cosine similarity as default:\n",
+    "db_cosine = HanaDB(\n",
+    "    embedding=embeddings, connection=connection, table_name=\"STATE_OF_THE_UNION\"\n",
+    ")\n",
+    "\n",
+    "# Attempting to create the HNSW index with default parameters\n",
+    "db_cosine.create_hnsw_index()  # If no other parameters are specified, the default values will be used\n",
+    "# Default values: m=64, ef_construction=128, ef_search=200\n",
+    "# The default index name will be: STATE_OF_THE_UNION_COSINE_SIMILARITY_IDX (verify this naming pattern in HanaDB class)\n",
+    "\n",
+    "\n",
+    "# Creating a HanaDB instance with L2 distance as the similarity function and defined values\n",
+    "db_l2 = HanaDB(\n",
+    "    embedding=embeddings,\n",
+    "    connection=connection,\n",
+    "    table_name=\"STATE_OF_THE_UNION\",\n",
+    "    distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,  # Specify L2 distance\n",
+    ")\n",
+    "\n",
+    "# This will create an index based on L2 distance strategy.\n",
+    "db_l2.create_hnsw_index(\n",
+    "    index_name=\"STATE_OF_THE_UNION_L2_index\",\n",
+    "    m=100,  # Max number of neighbors per graph node (valid range: 4 to 1000)\n",
+    "    ef_construction=200,  # Max number of candidates during graph construction (valid range: 1 to 100000)\n",
+    "    ef_search=500,  # Min number of candidates during the search (valid range: 1 to 100000)\n",
+    ")\n",
+    "\n",
+    "# Use L2 index to perform MMR\n",
+    "docs = db_l2.max_marginal_relevance_search(query, k=2, fetch_k=20)\n",
+    "for doc in docs:\n",
+    "    print(\"-\" * 80)\n",
+    "    print(doc.page_content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "**Key Points**:\n",
+    "- **Similarity Function**: The similarity function for the index is **cosine similarity** by default. If you want to use a different similarity function (e.g., `L2` distance), you need to specify it when initializing the `HanaDB` instance.\n",
+    "- **Default Parameters**: In the `create_hnsw_index` function, if the user does not provide custom values for parameters like `m`, `ef_construction`, or `ef_search`, the default values (e.g., `m=64`, `ef_construction=128`, `ef_search=200`) will be used automatically. These values ensure the index is created with reasonable performance without requiring user intervention.\n",
+    "\n"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -263,9 +411,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "db = HanaDB(\n",
    "    connection=connection, embedding=embeddings, table_name=\"LANGCHAIN_DEMO_BASIC\"\n",
@@ -284,9 +443,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "docs = [Document(page_content=\"Some text\"), Document(page_content=\"Other docs\")]\n",
    "db.add_documents(docs)"
@@ -301,9 +471,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "docs = [\n",
    "    Document(\n",
@@ -327,9 +508,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "foo\n",
+      "{'start': 100, 'end': 150, 'doc_name': 'foo.txt', 'quality': 'bad'}\n"
+     ]
+    }
+   ],
   "source": [
    "docs = db.similarity_search(\"foobar\", k=2, filter={\"quality\": \"bad\"})\n",
    "# With filtering on \"quality\"==\"bad\", only one document should be returned\n",
@@ -348,9 +539,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0\n"
+     ]
+    }
+   ],
   "source": [
    "db.delete(filter={\"quality\": \"bad\"})\n",
    "\n",
@@ -385,7 +584,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -433,9 +632,30 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Filter: {'id': {'$ne': 1}}\n",
+      "{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
+      "{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
+      "Filter: {'id': {'$gt': 1}}\n",
+      "{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
+      "{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
+      "Filter: {'id': {'$gte': 1}}\n",
+      "{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
+      "{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
+      "{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
+      "Filter: {'id': {'$lt': 1}}\n",
+      "<empty result>\n",
+      "Filter: {'id': {'$lte': 1}}\n",
+      "{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n"
+     ]
+    }
+   ],
   "source": [
    "advanced_filter = {\"id\": {\"$ne\": 1}}\n",
    "print(f\"Filter: {advanced_filter}\")\n",
@@ -467,9 +687,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Filter: {'id': {'$between': (1, 2)}}\n",
+      "{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
+      "{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
+      "Filter: {'name': {'$in': ['adam', 'bob']}}\n",
+      "{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
+      "{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
+      "Filter: {'name': {'$nin': ['adam', 'bob']}}\n",
+      "{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
+     ]
+    }
+   ],
   "source": [
    "advanced_filter = {\"id\": {\"$between\": (1, 2)}}\n",
    "print(f\"Filter: {advanced_filter}\")\n",
@@ -493,9 +728,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 27,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Filter: {'name': {'$like': 'a%'}}\n",
+      "{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
+      "Filter: {'name': {'$like': '%a%'}}\n",
+      "{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
+      "{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
+     ]
+    }
+   ],
   "source": [
    "advanced_filter = {\"name\": {\"$like\": \"a%\"}}\n",
    "print(f\"Filter: {advanced_filter}\")\n",
@@ -515,9 +762,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Filter: {'$or': [{'id': 1}, {'name': 'bob'}]}\n",
+      "{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
+      "{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
+      "Filter: {'$and': [{'id': 1}, {'id': 2}]}\n",
+      "<empty result>\n",
+      "Filter: {'$or': [{'id': 1}, {'id': 2}, {'id': 3}]}\n",
+      "{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
+      "{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
+      "{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
+     ]
+    }
+   ],
   "source": [
    "advanced_filter = {\"$or\": [{\"id\": 1}, {\"name\": \"bob\"}]}\n",
    "print(f\"Filter: {advanced_filter}\")\n",
@@ -541,7 +804,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -574,7 +837,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -635,9 +898,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer from LLM:\n",
+      "================\n",
+      "The United States has set up joint patrols with Mexico and Guatemala to catch more human traffickers. This collaboration is part of the efforts to address immigration issues and secure the borders in the region.\n",
+      "================\n",
+      "Number of used source document chunks: 5\n"
+     ]
+    }
+   ],
   "source": [
    "question = \"What about Mexico and Guatemala?\"\n",
    "\n",
@@ -679,9 +954,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer from LLM:\n",
+      "================\n",
+      "Mexico and Guatemala are involved in joint patrols to catch human traffickers.\n"
+     ]
+    }
+   ],
   "source": [
    "question = \"What about other countries?\"\n",
    "\n",
@@ -711,9 +996,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "# Access the vector DB with a new table\n",
    "db = HanaDB(\n",
@@ -742,9 +1038,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('VEC_META', 'NCLOB')\n",
+      "('VEC_TEXT', 'NCLOB')\n",
+      "('VEC_VECTOR', 'REAL_VECTOR')\n"
+     ]
+    }
+   ],
   "source": [
    "cur = connection.cursor()\n",
    "cur.execute(\n",
@@ -795,12 +1101,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "None\n",
+      "Some other text\n",
+      "{\"start\": 400, \"end\": 450, \"doc_name\": \"other.txt\"}\n",
+      "<memory at 0x7f5edcb18d00>\n"
+     ]
+    }
+   ],
   "source": [
-    "# Create a new table \"MY_OWN_TABLE\" with three \"standard\" columns and one additional column\n",
-    "my_own_table_name = \"MY_OWN_TABLE\"\n",
+    "# Create a new table \"MY_OWN_TABLE_ADD\" with three \"standard\" columns and one additional column\n",
+    "my_own_table_name = \"MY_OWN_TABLE_ADD\"\n",
    "cur = connection.cursor()\n",
    "cur.execute(\n",
    "    (\n",
@@ -851,9 +1168,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "Some other text\n",
+      "--------------------------------------------------------------------------------\n",
+      "Some more text\n"
+     ]
+    }
+   ],
   "source": [
    "docs = [\n",
    "    Document(\n",
@@ -886,9 +1214,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Filters on this value are very performant\n",
+      "Some other text\n",
+      "{\"start\": 400, \"end\": 450, \"doc_name\": \"other.txt\", \"CUSTOMTEXT\": \"Filters on this value are very performant\"}\n",
+      "<memory at 0x7f5edcb193c0>\n"
+     ]
+    }
+   ],
   "source": [
    "# Create a new table \"PERFORMANT_CUSTOMTEXT_FILTER\" with three \"standard\" columns and one additional column\n",
    "my_own_table_name = \"PERFORMANT_CUSTOMTEXT_FILTER\"\n",
@@ -952,9 +1291,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 42,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "Some other text\n",
+      "--------------------------------------------------------------------------------\n",
+      "Some more text\n"
+     ]
+    }
+   ],
   "source": [
    "docs = [\n",
    "    Document(\n",
@@ -994,7 +1344,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.10.14"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/vectorstores/sqlitevec.ipynb
+++ b/docs/docs/integrations/vectorstores/sqlitevec.ipynb
@@ -155,7 +155,7 @@
   "cell_type": "markdown",
   "source": [
    "## API reference\n",
-    "For detailed documentation of all SQLiteVec features and configurations head to the API reference:https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.sqlitevec.SQLiteVec.html"
+    "For detailed documentation of all SQLiteVec features and configurations head to the API reference: https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.sqlitevec.SQLiteVec.html"
   ]
  },
  {
--- a/docs/docs/security.md
+++ b/docs/docs/security.md
@@ -1,30 +0,0 @@
-# Security
-
-LangChain has a large ecosystem of integrations with various external resources like local and remote file systems, APIs and databases. These integrations allow developers to create versatile applications that combine the power of LLMs with the ability to access, interact with and manipulate external resources.
-
-## Best practices
-
-When building such applications developers should remember to follow good security practices:
-
-* [**Limit Permissions**](https://en.wikipedia.org/wiki/Principle_of_least_privilege): Scope permissions specifically to the application's need. Granting broad or excessive permissions can introduce significant security vulnerabilities. To avoid such vulnerabilities, consider using read-only credentials, disallowing access to sensitive resources, using sandboxing techniques (such as running inside a container), specifying proxy configurations to control external requests, etc. as appropriate for your application.
-* **Anticipate Potential Misuse**: Just as humans can err, so can Large Language Models (LLMs). Always assume that any system access or credentials may be used in any way allowed by the permissions they are assigned. For example, if a pair of database credentials allows deleting data, it’s safest to assume that any LLM able to use those credentials may in fact delete data.
-* [**Defense in Depth**](https://en.wikipedia.org/wiki/Defense_in_depth_(computing)): No security technique is perfect. Fine-tuning and good chain design can reduce, but not eliminate, the odds that a Large Language Model (LLM) may make a mistake. It’s best to combine multiple layered security approaches rather than relying on any single layer of defense to ensure security. For example: use both read-only permissions and sandboxing to ensure that LLMs are only able to access data that is explicitly meant for them to use.
-
-Risks of not doing so include, but are not limited to:
-* Data corruption or loss.
-* Unauthorized access to confidential information.
-* Compromised performance or availability of critical resources.
-
-Example scenarios with mitigation strategies:
-
-* A user may ask an agent with access to the file system to delete files that should not be deleted or read the content of files that contain sensitive information. To mitigate, limit the agent to only use a specific directory and only allow it to read or write files that are safe to read or write. Consider further sandboxing the agent by running it in a container.
-* A user may ask an agent with write access to an external API to write malicious data to the API, or delete data from that API. To mitigate, give the agent read-only API keys, or limit it to only use endpoints that are already resistant to such misuse.
-* A user may ask an agent with access to a database to drop a table or mutate the schema. To mitigate, scope the credentials to only the tables that the agent needs to access and consider issuing READ-ONLY credentials.
-
-If you're building applications that access external resources like file systems, APIs
-or databases, consider speaking with your company's security team to determine how to best
-design and secure your applications.
-
-## Reporting a vulnerability
-
-Please report security vulnerabilities by email to security@langchain.dev. This will ensure the issue is promptly triaged and acted upon as needed.
--- a/docs/docs/tutorials/retrievers.ipynb
+++ b/docs/docs/tutorials/retrievers.ipynb
@@ -245,7 +245,7 @@
    "\n",
    "import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
    "\n",
-    "<EmbeddingTabs customVarName=\"embeddings_model\" />"
+    "<EmbeddingTabs customVarName=\"embeddings\" />"
   ]
  },
  {
--- a/docs/docs/versions/migrating_memory/conversation_buffer_window_memory.ipynb
+++ b/docs/docs/versions/migrating_memory/conversation_buffer_window_memory.ipynb
@@ -426,7 +426,7 @@
    "\n",
    "## Usage with a pre-built langgraph agent\n",
    "\n",
-    "This example shows usage of an Agent Executor with a pre-built agent constructed using the [create_tool_calling_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.tool_calling_agent.base.create_tool_calling_agent.html) function.\n",
+    "This example shows usage of an Agent Executor with a pre-built agent constructed using the [create_tool_calling_agent](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.tool_calling_agent.base.create_tool_calling_agent.html) function.\n",
    "\n",
    "If you are using one of the [old LangChain pre-built agents](https://python.langchain.com/v0.1/docs/modules/agents/agent_types/), you should be able\n",
    "to replace that code with the new [langgraph pre-built agent](https://langchain-ai.github.io/langgraph/how-tos/create-react-agent/) which leverages\n",
@@ -673,7 +673,7 @@
    "</details>\n",
    "\n",
    "If you need to implement more efficient logic and want to use `RunnableWithMessageHistory` for now the way to achieve this\n",
-    "is to subclass from [BaseChatMessageHistory](https://api.python.langchain.com/en/latest/chat_history/langchain_core.chat_history.BaseChatMessageHistory.html) and\n",
+    "is to subclass from [BaseChatMessageHistory](https://python.langchain.com/api_reference/core/chat_history/langchain_core.chat_history.BaseChatMessageHistory.html) and\n",
    "define appropriate logic for `add_messages` (that doesn't simply append the history, but instead re-writes it).\n",
    "\n",
    "Unless you have a good reason to implement this solution, you should instead use LangGraph."
--- a/docs/src/theme/ChatModelTabs.js
+++ b/docs/src/theme/ChatModelTabs.js
@@ -15,6 +15,7 @@ import CodeBlock from "@theme-original/CodeBlock";
 * @property {string} [googleParams] - Parameters for Google chat model. Defaults to `model="gemini-pro"`
 * @property {string} [togetherParams] - Parameters for Together chat model. Defaults to `model="mistralai/Mixtral-8x7B-Instruct-v0.1"`
 * @property {string} [nvidiaParams] - Parameters for Nvidia NIM model. Defaults to `model="meta/llama3-70b-instruct"`
+  * @property {string} [databricksParams] - Parameters for Databricks model. Defaults to `endpoint="databricks-meta-llama-3-1-70b-instruct"`
 * @property {string} [awsBedrockParams] - Parameters for AWS Bedrock chat model.
 * @property {boolean} [hideOpenai] - Whether or not to hide OpenAI chat model.
 * @property {boolean} [hideAnthropic] - Whether or not to hide Anthropic chat model.
@@ -27,6 +28,7 @@ import CodeBlock from "@theme-original/CodeBlock";
 * @property {boolean} [hideAzure] - Whether or not to hide Microsoft Azure OpenAI chat model.
 * @property {boolean} [hideNvidia] - Whether or not to hide NVIDIA NIM model.
 * @property {boolean} [hideAWS] - Whether or not to hide AWS models.
+ * @property {boolean} [hideDatabricks] - Whether or not to hide Databricks models.
 * @property {string} [customVarName] - Custom variable name for the model. Defaults to `model`.
 */

@@ -46,6 +48,7 @@ export default function ChatModelTabs(props) {
    azureParams,
    nvidiaParams,
    awsBedrockParams,
+    databricksParams,
    hideOpenai,
    hideAnthropic,
    hideCohere,
@@ -57,6 +60,7 @@ export default function ChatModelTabs(props) {
    hideAzure,
    hideNvidia,
    hideAWS,
+    hideDatabricks,
    customVarName,
  } = props;

@@ -79,6 +83,7 @@ export default function ChatModelTabs(props) {
    `\n    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],\n    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],\n    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],\n`;
  const nvidiaParamsOrDefault = nvidiaParams ?? `model="meta/llama3-70b-instruct"`
  const awsBedrockParamsOrDefault = awsBedrockParams ?? `model="anthropic.claude-3-5-sonnet-20240620-v1:0",\n    beta_use_converse_api=True`;
+  const databricksParamsOrDefault = databricksParams ?? `endpoint="databricks-meta-llama-3-1-70b-instruct"`

  const llmVarName = customVarName ?? "model";

@@ -182,6 +187,15 @@ export default function ChatModelTabs(props) {
      default: false,
      shouldHide: hideTogether,
    },
+    {
+      value: "Databricks",
+      label: "Databricks",
+      text: `from databricks_langchain import ChatDatabricks\n\nos.environ["DATABRICKS_HOST"] = "https://example.staging.cloud.databricks.com/serving-endpoints"\n\n${llmVarName} = ChatDatabricks(${databricksParamsOrDefault})`,
+      apiKeyName: "DATABRICKS_TOKEN",
+      packageName: "databricks-langchain",
+      default: false,
+      shouldHide: hideDatabricks,
+    },
  ];

  return (
--- a/libs/cli/.gitignore
+++ b/libs/cli/.gitignore
@@ -158,3 +158,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+.integration_test
--- a/libs/cli/Makefile
+++ b/libs/cli/Makefile
@@ -1,8 +1,49 @@
-lint lint_diff:
-	poetry run poe lint

-test:
-	poetry run poe test
+######################
+# LINTING AND FORMATTING
+######################

-format:
-	poetry run poe format
+# Define a variable for Python and notebook files.
+PYTHON_FILES=.
+MYPY_CACHE=.mypy_cache
+lint format: PYTHON_FILES=.
+lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/cli --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
+lint_package: PYTHON_FILES=langchain_cli
+lint_tests: PYTHON_FILES=tests
+lint_tests: MYPY_CACHE=.mypy_cache_test
+
+lint lint_diff lint_package lint_tests:
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
+	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
+
+format format_diff:
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)
+
+test tests: _test _e2e_test
+
+PYTHON = .venv/bin/python
+
+_test:
+	poetry run pytest tests
+
+# custom integration testing for cli integration flow
+# currently ignores vectorstores test because lacks implementation
+_e2e_test:
+	rm -rf .integration_test
+	mkdir .integration_test
+	cd .integration_test && \
+		python3 -m venv .venv && \
+		$(PYTHON) -m pip install --upgrade poetry && \
+		$(PYTHON) -m pip install -e .. && \
+		$(PYTHON) -m langchain_cli.cli integration new --name parrot-link --name-class ParrotLink && \
+		$(PYTHON) -m langchain_cli.cli integration new --name parrot-link --name-class ParrotLinkB --src=integration_template/chat_models.py --dst=langchain-parrot-link/langchain_parrot_link/chat_models_b.py && \
+		$(PYTHON) -m langchain_cli.cli integration create-doc --name parrot-link --name-class ParrotLinkB --component-type ChatModel --destination-dir langchain-parrot-link/docs && \
+		cd langchain-parrot-link && \
+			poetry install --with lint,typing,test && \
+			poetry run pip install -e ../../../standard-tests && \
+			make format lint tests && \
+			poetry install --with test_integration && \
+			rm tests/integration_tests/test_vectorstores.py && \
+			make integration_test 
--- a/libs/cli/langchain_cli/dev_scripts.py
+++ b/libs/cli/langchain_cli/dev_scripts.py
@@ -1,3 +1,4 @@
+# type: ignore
 """
 Development Scripts for template packages
 """
--- a/libs/cli/langchain_cli/integration_template/Makefile
+++ b/libs/cli/langchain_cli/integration_template/Makefile
@@ -33,13 +33,13 @@ lint_tests: PYTHON_FILES=tests
 lint_tests: MYPY_CACHE=.mypy_cache_test

 lint lint_diff lint_package lint_tests:
-	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
 	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)

 format format_diff:
 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
-	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)

 spell_check:
 	poetry run codespell --toml pyproject.toml
--- a/libs/cli/langchain_cli/integration_template/integration_template/init.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/init.py
@@ -1,8 +1,11 @@
 from importlib import metadata

 from __module_name__.chat_models import Chat__ModuleName__
+from __module_name__.document_loaders import __ModuleName__Loader
 from __module_name__.embeddings import __ModuleName__Embeddings
-from __module_name__.llms import __ModuleName__LLM
+from __module_name__.retrievers import __ModuleName__Retriever
+from __module_name__.toolkits import __ModuleName__Toolkit
+from __module_name__.tools import __ModuleName__Tool
 from __module_name__.vectorstores import __ModuleName__VectorStore

 try:
@@ -14,8 +17,11 @@ del metadata  # optional, avoids polluting the results of dir(__package__)

 __all__ = [
    "Chat__ModuleName__",
-    "__ModuleName__LLM",
    "__ModuleName__VectorStore",
    "__ModuleName__Embeddings",
+    "__ModuleName__Loader",
+    "__ModuleName__Retriever",
+    "__ModuleName__Toolkit",
+    "__ModuleName__Tool",
    "__version__",
 ]
--- a/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py
@@ -1,13 +1,19 @@
 """__ModuleName__ chat models."""

-from typing import Any, List, Optional
+from typing import Any, Dict, Iterator, List, Optional

 from langchain_core.callbacks import (
    CallbackManagerForLLMRun,
 )
-from langchain_core.language_models.chat_models import BaseChatModel
-from langchain_core.messages import BaseMessage
-from langchain_core.outputs import ChatResult
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+)
+from langchain_core.messages.ai import UsageMetadata
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from pydantic import Field


 class Chat__ModuleName__(BaseChatModel):
@@ -15,6 +21,8 @@ class Chat__ModuleName__(BaseChatModel):
    # https://github.com/langchain-ai/langchain/blob/7ff05357bac6eaedf5058a2af88f23a1817d40fe/libs/partners/openai/langchain_openai/chat_models/base.py#L1120
    """__ModuleName__ chat model integration.

+    The default implementation echoes the first `parrot_buffer_length` characters of the input.
+
    # TODO: Replace with relevant packages, env vars.
    Setup:
        Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
@@ -258,7 +266,36 @@ class Chat__ModuleName__(BaseChatModel):

    """  # noqa: E501

-    # TODO: This method must be implemented to generate chat responses.
+    model_name: str = Field(alias="model")
+    """The name of the model"""
+    parrot_buffer_length: int
+    """The number of characters from the last message of the prompt to be echoed."""
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+    timeout: Optional[int] = None
+    stop: Optional[List[str]] = None
+    max_retries: int = 2
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of chat model."""
+        return "chat-__package_name_short__"
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """Return a dictionary of identifying parameters.
+
+        This information is used by the LangChain callback system, which
+        is used for tracing purposes make it possible to monitor LLMs.
+        """
+        return {
+            # The model name allows users to specify custom token counting
+            # rules in LLM monitoring applications (e.g., in LangSmith users
+            # can provide per token pricing for their model and monitor
+            # costs for the given LLM.)
+            "model_name": self.model_name,
+        }
+
    def _generate(
        self,
        messages: List[BaseMessage],
@@ -266,16 +303,101 @@ class Chat__ModuleName__(BaseChatModel):
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
-        raise NotImplementedError()
+        """Override the _generate method to implement the chat model logic.

-    # TODO: Implement if Chat__ModuleName__ supports streaming. Otherwise delete method.
-    # def _stream(
-    #     self,
-    #     messages: List[BaseMessage],
-    #     stop: Optional[List[str]] = None,
-    #     run_manager: Optional[CallbackManagerForLLMRun] = None,
-    #     **kwargs: Any,
-    # ) -> Iterator[ChatGenerationChunk]:
+        This can be a call to an API, a call to a local model, or any other
+        implementation that generates a response to the input prompt.
+
+        Args:
+            messages: the prompt composed of a list of messages.
+            stop: a list of strings on which the model should stop generating.
+                  If generation stops due to a stop token, the stop token itself
+                  SHOULD BE INCLUDED as part of the output. This is not enforced
+                  across models right now, but it's a good practice to follow since
+                  it makes it much easier to parse the output of the model
+                  downstream and understand why generation stopped.
+            run_manager: A run manager with callbacks for the LLM.
+        """
+        # Replace this with actual logic to generate a response from a list
+        # of messages.
+        last_message = messages[-1]
+        tokens = last_message.content[: self.parrot_buffer_length]
+        ct_input_tokens = sum(len(message.content) for message in messages)
+        ct_output_tokens = len(tokens)
+        message = AIMessage(
+            content=tokens,
+            additional_kwargs={},  # Used to add additional payload to the message
+            response_metadata={  # Use for response metadata
+                "time_in_seconds": 3,
+            },
+            usage_metadata={
+                "input_tokens": ct_input_tokens,
+                "output_tokens": ct_output_tokens,
+                "total_tokens": ct_input_tokens + ct_output_tokens,
+            },
+        )
+        ##
+
+        generation = ChatGeneration(message=message)
+        return ChatResult(generations=[generation])
+
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        """Stream the output of the model.
+
+        This method should be implemented if the model can generate output
+        in a streaming fashion. If the model does not support streaming,
+        do not implement it. In that case streaming requests will be automatically
+        handled by the _generate method.
+
+        Args:
+            messages: the prompt composed of a list of messages.
+            stop: a list of strings on which the model should stop generating.
+                  If generation stops due to a stop token, the stop token itself
+                  SHOULD BE INCLUDED as part of the output. This is not enforced
+                  across models right now, but it's a good practice to follow since
+                  it makes it much easier to parse the output of the model
+                  downstream and understand why generation stopped.
+            run_manager: A run manager with callbacks for the LLM.
+        """
+        last_message = messages[-1]
+        tokens = str(last_message.content[: self.parrot_buffer_length])
+        ct_input_tokens = sum(len(message.content) for message in messages)
+
+        for token in tokens:
+            usage_metadata = UsageMetadata(
+                {
+                    "input_tokens": ct_input_tokens,
+                    "output_tokens": 1,
+                    "total_tokens": ct_input_tokens + 1,
+                }
+            )
+            ct_input_tokens = 0
+            chunk = ChatGenerationChunk(
+                message=AIMessageChunk(content=token, usage_metadata=usage_metadata)
+            )
+
+            if run_manager:
+                # This is optional in newer versions of LangChain
+                # The on_llm_new_token will be called automatically
+                run_manager.on_llm_new_token(token, chunk=chunk)
+
+            yield chunk
+
+        # Let's add some other information (e.g., response metadata)
+        chunk = ChatGenerationChunk(
+            message=AIMessageChunk(content="", response_metadata={"time_in_sec": 3})
+        )
+        if run_manager:
+            # This is optional in newer versions of LangChain
+            # The on_llm_new_token will be called automatically
+            run_manager.on_llm_new_token(token, chunk=chunk)
+        yield chunk

    # TODO: Implement if Chat__ModuleName__ supports async streaming. Otherwise delete.
    # async def _astream(
@@ -294,8 +416,3 @@ class Chat__ModuleName__(BaseChatModel):
    #     run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
    #     **kwargs: Any,
    # ) -> ChatResult:
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of chat model."""
-        return "chat-__package_name_short__"
--- a/libs/cli/langchain_cli/integration_template/integration_template/embeddings.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/embeddings.py
@@ -8,7 +8,8 @@ class __ModuleName__Embeddings(Embeddings):

    # TODO: Replace with relevant packages, env vars.
    Setup:
-        Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
+        Install ``__package_name__`` and set environment variable
+        ``__MODULE_NAME___API_KEY``.

        .. code-block:: bash

@@ -70,21 +71,26 @@ class __ModuleName__Embeddings(Embeddings):

    """

+    def __init__(self, model: str):
+        self.model = model
+
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed search docs."""
-        raise NotImplementedError
+        return [[0.5, 0.6, 0.7] for _ in texts]

    def embed_query(self, text: str) -> List[float]:
        """Embed query text."""
-        raise NotImplementedError
+        return self.embed_documents([text])[0]

-    # only keep aembed_documents and aembed_query if they're implemented!
-    # delete them otherwise to use the base class' default
-    # implementation, which calls the sync version in an executor
-    async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
-        """Asynchronous Embed search docs."""
-        raise NotImplementedError
+    # optional: add custom async implementations here
+    # you can also delete these, and the base class will
+    # use the default implementation, which calls the sync
+    # version in an async executor:

-    async def aembed_query(self, text: str) -> List[float]:
-        """Asynchronous Embed query text."""
-        raise NotImplementedError
+    # async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
+    #     """Asynchronous Embed search docs."""
+    #     ...
+
+    # async def aembed_query(self, text: str) -> List[float]:
+    #     """Asynchronous Embed query text."""
+    #     ...
--- a/libs/cli/langchain_cli/integration_template/integration_template/llms.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/llms.py
@@ -1,155 +0,0 @@
-"""__ModuleName__ large language models."""
-
-from typing import (
-    Any,
-    List,
-    Optional,
-)
-
-from langchain_core.callbacks import (
-    CallbackManagerForLLMRun,
-)
-from langchain_core.language_models import BaseLLM
-from langchain_core.outputs import LLMResult
-
-
-class __ModuleName__LLM(BaseLLM):
-    """__ModuleName__ completion model integration.
-
-    # TODO: Replace with relevant packages, env vars.
-    Setup:
-        Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
-
-        .. code-block:: bash
-
-            pip install -U __package_name__
-            export __MODULE_NAME___API_KEY="your-api-key"
-
-    # TODO: Populate with relevant params.
-    Key init args — completion params:
-        model: str
-            Name of __ModuleName__ model to use.
-        temperature: float
-            Sampling temperature.
-        max_tokens: Optional[int]
-            Max number of tokens to generate.
-
-    # TODO: Populate with relevant params.
-    Key init args — client params:
-        timeout: Optional[float]
-            Timeout for requests.
-        max_retries: int
-            Max number of retries.
-        api_key: Optional[str]
-            __ModuleName__ API key. If not passed in will be read from env var __MODULE_NAME___API_KEY.
-
-    See full list of supported init args and their descriptions in the params section.
-
-    # TODO: Replace with relevant init params.
-    Instantiate:
-        .. code-block:: python
-
-            from __module_name__ import __ModuleName__LLM
-
-            llm = __ModuleName__LLM(
-                model="...",
-                temperature=0,
-                max_tokens=None,
-                timeout=None,
-                max_retries=2,
-                # api_key="...",
-                # other params...
-            )
-
-    Invoke:
-        .. code-block:: python
-
-            input_text = "The meaning of life is "
-            llm.invoke(input_text)
-
-        .. code-block:: python
-
-            # TODO: Example output.
-
-    # TODO: Delete if token-level streaming isn't supported.
-    Stream:
-        .. code-block:: python
-
-            for chunk in llm.stream(input_text):
-                print(chunk)
-
-        .. code-block:: python
-
-            # TODO: Example output.
-
-        .. code-block:: python
-
-            ''.join(llm.stream(input_text))
-
-        .. code-block:: python
-
-            # TODO: Example output.
-
-    # TODO: Delete if native async isn't supported.
-    Async:
-        .. code-block:: python
-
-            await llm.ainvoke(input_text)
-
-            # stream:
-            # async for chunk in (await llm.astream(input_text))
-
-            # batch:
-            # await llm.abatch([input_text])
-
-        .. code-block:: python
-
-            # TODO: Example output.
-    """
-
-    # TODO: This method must be implemented to generate text completions.
-    def _generate(
-        self,
-        prompts: List[str],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> LLMResult:
-        raise NotImplementedError
-
-    # TODO: Implement if __ModuleName__LLM supports async generation. Otherwise
-    # delete method.
-    # async def _agenerate(
-    #     self,
-    #     prompts: List[str],
-    #     stop: Optional[List[str]] = None,
-    #     run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-    #     **kwargs: Any,
-    # ) -> LLMResult:
-    #     raise NotImplementedError
-
-    # TODO: Implement if __ModuleName__LLM supports streaming. Otherwise delete method.
-    # def _stream(
-    #     self,
-    #     prompt: str,
-    #     stop: Optional[List[str]] = None,
-    #     run_manager: Optional[CallbackManagerForLLMRun] = None,
-    #     **kwargs: Any,
-    # ) -> Iterator[GenerationChunk]:
-    #     raise NotImplementedError
-
-    # TODO: Implement if __ModuleName__LLM supports async streaming. Otherwise delete
-    # method.
-    # async def _astream(
-    #     self,
-    #     prompt: str,
-    #     stop: Optional[List[str]] = None,
-    #     run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-    #     **kwargs: Any,
-    # ) -> AsyncIterator[GenerationChunk]:
-    #     raise NotImplementedError
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of LLM."""
-        return "__package_name_short__-llm"
--- a/libs/cli/langchain_cli/integration_template/integration_template/retrievers.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/retrievers.py
@@ -1,7 +1,8 @@
 """__ModuleName__ retrievers."""

-from typing import List
+from typing import Any, List

+from langchain_core.callbacks import CallbackManagerForRetrieverRun
 from langchain_core.documents import Document
 from langchain_core.retrievers import BaseRetriever

@@ -13,7 +14,8 @@ class __ModuleName__Retriever(BaseRetriever):

    # TODO: Replace with relevant packages, env vars, etc.
    Setup:
-        Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
+        Install ``__package_name__`` and set environment variable
+        ``__MODULE_NAME___API_KEY``.

        .. code-block:: bash

@@ -82,8 +84,24 @@ class __ModuleName__Retriever(BaseRetriever):

             # TODO: Example output.

-    """  # noqa: E501
+    """
+
+    k: int = 3

    # TODO: This method must be implemented to retrieve documents.
-    def _get_relevant_documents(self, query: str) -> List[Document]:
-        raise NotImplementedError()
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any
+    ) -> List[Document]:
+        k = kwargs.get("k", self.k)
+        return [
+            Document(page_content=f"Result {i} for query: {query}") for i in range(k)
+        ]
+
+    # optional: add custom async implementations here
+    # async def _aget_relevant_documents(
+    #     self,
+    #     query: str,
+    #     *,
+    #     run_manager: AsyncCallbackManagerForRetrieverRun,
+    #     **kwargs: Any,
+    # ) -> List[Document]: ...
--- a/libs/cli/langchain_cli/integration_template/integration_template/toolkits.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/toolkits.py
@@ -2,10 +2,10 @@

 from typing import List

-from langchain_core.tools import BaseTool, BaseToolKit
+from langchain_core.tools import BaseTool, BaseToolkit


-class __ModuleName__Toolkit(BaseToolKit):
+class __ModuleName__Toolkit(BaseToolkit):
    # TODO: Replace all TODOs in docstring. See example docstring:
    # https://github.com/langchain-ai/langchain/blob/c123cb2b304f52ab65db4714eeec46af69a861ec/libs/community/langchain_community/agent_toolkits/sql/toolkit.py#L19
    """__ModuleName__ toolkit.
--- a/libs/cli/langchain_cli/integration_template/integration_template/tools.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/tools.py
@@ -6,10 +6,10 @@ from langchain_core.callbacks import (
    CallbackManagerForToolRun,
 )
 from langchain_core.tools import BaseTool
-from pydantic import BaseModel
+from pydantic import BaseModel, Field


-class __ModuleName__Input(BaseModel):
+class __ModuleName__ToolInput(BaseModel):
    """Input schema for __ModuleName__ tool.

    This docstring is **not** part of what is sent to the model when performing tool
@@ -18,12 +18,11 @@ class __ModuleName__Input(BaseModel):
    """

    # TODO: Add input args and descriptions.
-    # a: int = Field(..., description="first number")
-    # b: int = Field(0, description="second number")
-    ...
+    a: int = Field(..., description="first number to add")
+    b: int = Field(..., description="second number to add")


-class __ModuleName__Tool(BaseTool):
+class __ModuleName__Tool(BaseTool):  # type: ignore[override]
    """__ModuleName__ tool.

    Setup:
@@ -69,24 +68,26 @@ class __ModuleName__Tool(BaseTool):
    """The name that is passed to the model when performing tool calling."""
    description: str = "TODO: Tool description."
    """The description that is passed to the model when performing tool calling."""
-    args_schema: Type[BaseModel] = __ModuleName__Input
+    args_schema: Type[BaseModel] = __ModuleName__ToolInput
    """The schema that is passed to the model when performing tool calling."""

    # TODO: Add any other init params for the tool.
    # param1: Optional[str]
    # """param1 determines foobar"""

-    # TODO: Replaced *args with real tool arguments.
+    # TODO: Replaced (a, b) with real tool arguments.
    def _run(
-        self, *args, run_manager: Optional[CallbackManagerForToolRun] = None
+        self, a: int, b: int, *, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:
-        raise NotImplementedError
+        return str(a + b + 80)

    # TODO: Implement if tool has native async functionality, otherwise delete.

    # async def _arun(
    #     self,
-    #     *args,
+    #     a: int,
+    #     b: int,
+    #     *,
    #     run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
    # ) -> str:
    #     ...
--- a/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py
@@ -2,8 +2,6 @@

 from __future__ import annotations

-import asyncio
-from functools import partial
 from typing import (
    TYPE_CHECKING,
    Any,
@@ -160,6 +158,8 @@ class __ModuleName__VectorStore(VectorStore):

    """  # noqa: E501

+    _database: dict[str, tuple[Document, list[float]]] = {}
+
    def add_texts(
        self,
        texts: Iterable[str],
@@ -168,65 +168,70 @@ class __ModuleName__VectorStore(VectorStore):
    ) -> List[str]:
        raise NotImplementedError

-    async def aadd_texts(
-        self,
-        texts: Iterable[str],
-        metadatas: Optional[List[dict]] = None,
-        **kwargs: Any,
-    ) -> List[str]:
-        return await asyncio.get_running_loop().run_in_executor(
-            None, partial(self.add_texts, **kwargs), texts, metadatas
-        )
+    # optional: add custom async implementations
+    # async def aadd_texts(
+    #     self,
+    #     texts: Iterable[str],
+    #     metadatas: Optional[List[dict]] = None,
+    #     **kwargs: Any,
+    # ) -> List[str]:
+    #     return await asyncio.get_running_loop().run_in_executor(
+    #         None, partial(self.add_texts, **kwargs), texts, metadatas
+    #     )

    def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
        raise NotImplementedError

-    async def adelete(
-        self, ids: Optional[List[str]] = None, **kwargs: Any
-    ) -> Optional[bool]:
-        raise NotImplementedError
+    # optional: add custom async implementations
+    # async def adelete(
+    #     self, ids: Optional[List[str]] = None, **kwargs: Any
+    # ) -> Optional[bool]:
+    #     raise NotImplementedError

    def similarity_search(
        self, query: str, k: int = 4, **kwargs: Any
    ) -> List[Document]:
        raise NotImplementedError

-    async def asimilarity_search(
-        self, query: str, k: int = 4, **kwargs: Any
-    ) -> List[Document]:
-        # This is a temporary workaround to make the similarity search
-        # asynchronous. The proper solution is to make the similarity search
-        # asynchronous in the vector store implementations.
-        func = partial(self.similarity_search, query, k=k, **kwargs)
-        return await asyncio.get_event_loop().run_in_executor(None, func)
+    # optional: add custom async implementations
+    # async def asimilarity_search(
+    #     self, query: str, k: int = 4, **kwargs: Any
+    # ) -> List[Document]:
+    #     # This is a temporary workaround to make the similarity search
+    #     # asynchronous. The proper solution is to make the similarity search
+    #     # asynchronous in the vector store implementations.
+    #     func = partial(self.similarity_search, query, k=k, **kwargs)
+    #     return await asyncio.get_event_loop().run_in_executor(None, func)

    def similarity_search_with_score(
        self, *args: Any, **kwargs: Any
    ) -> List[Tuple[Document, float]]:
        raise NotImplementedError

-    async def asimilarity_search_with_score(
-        self, *args: Any, **kwargs: Any
-    ) -> List[Tuple[Document, float]]:
-        # This is a temporary workaround to make the similarity search
-        # asynchronous. The proper solution is to make the similarity search
-        # asynchronous in the vector store implementations.
-        func = partial(self.similarity_search_with_score, *args, **kwargs)
-        return await asyncio.get_event_loop().run_in_executor(None, func)
+    # optional: add custom async implementations
+    # async def asimilarity_search_with_score(
+    #     self, *args: Any, **kwargs: Any
+    # ) -> List[Tuple[Document, float]]:
+    #     # This is a temporary workaround to make the similarity search
+    #     # asynchronous. The proper solution is to make the similarity search
+    #     # asynchronous in the vector store implementations.
+    #     func = partial(self.similarity_search_with_score, *args, **kwargs)
+    #     return await asyncio.get_event_loop().run_in_executor(None, func)

    def similarity_search_by_vector(
        self, embedding: List[float], k: int = 4, **kwargs: Any
    ) -> List[Document]:
        raise NotImplementedError

-    async def asimilarity_search_by_vector(
-        self, embedding: List[float], k: int = 4, **kwargs: Any
-    ) -> List[Document]:
-        # This is a temporary workaround to make the similarity search
-        # asynchronous. The proper solution is to make the similarity search
-        # asynchronous in the vector store implementations.
-        func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
-        return await asyncio.get_event_loop().run_in_executor(None, func)
+    # optional: add custom async implementations
+    # async def asimilarity_search_by_vector(
+    #     self, embedding: List[float], k: int = 4, **kwargs: Any
+    # ) -> List[Document]:
+    #     # This is a temporary workaround to make the similarity search
+    #     # asynchronous. The proper solution is to make the similarity search
+    #     # asynchronous in the vector store implementations.
+    #     func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
+    #     return await asyncio.get_event_loop().run_in_executor(None, func)

    def max_marginal_relevance_search(
        self,
@@ -238,26 +243,27 @@ class __ModuleName__VectorStore(VectorStore):
    ) -> List[Document]:
        raise NotImplementedError

-    async def amax_marginal_relevance_search(
-        self,
-        query: str,
-        k: int = 4,
-        fetch_k: int = 20,
-        lambda_mult: float = 0.5,
-        **kwargs: Any,
-    ) -> List[Document]:
-        # This is a temporary workaround to make the similarity search
-        # asynchronous. The proper solution is to make the similarity search
-        # asynchronous in the vector store implementations.
-        func = partial(
-            self.max_marginal_relevance_search,
-            query,
-            k=k,
-            fetch_k=fetch_k,
-            lambda_mult=lambda_mult,
-            **kwargs,
-        )
-        return await asyncio.get_event_loop().run_in_executor(None, func)
+    # optional: add custom async implementations
+    # async def amax_marginal_relevance_search(
+    #     self,
+    #     query: str,
+    #     k: int = 4,
+    #     fetch_k: int = 20,
+    #     lambda_mult: float = 0.5,
+    #     **kwargs: Any,
+    # ) -> List[Document]:
+    #     # This is a temporary workaround to make the similarity search
+    #     # asynchronous. The proper solution is to make the similarity search
+    #     # asynchronous in the vector store implementations.
+    #     func = partial(
+    #         self.max_marginal_relevance_search,
+    #         query,
+    #         k=k,
+    #         fetch_k=fetch_k,
+    #         lambda_mult=lambda_mult,
+    #         **kwargs,
+    #     )
+    #     return await asyncio.get_event_loop().run_in_executor(None, func)

    def max_marginal_relevance_search_by_vector(
        self,
@@ -269,15 +275,16 @@ class __ModuleName__VectorStore(VectorStore):
    ) -> List[Document]:
        raise NotImplementedError

-    async def amax_marginal_relevance_search_by_vector(
-        self,
-        embedding: List[float],
-        k: int = 4,
-        fetch_k: int = 20,
-        lambda_mult: float = 0.5,
-        **kwargs: Any,
-    ) -> List[Document]:
-        raise NotImplementedError
+    # optional: add custom async implementations
+    # async def amax_marginal_relevance_search_by_vector(
+    #     self,
+    #     embedding: List[float],
+    #     k: int = 4,
+    #     fetch_k: int = 20,
+    #     lambda_mult: float = 0.5,
+    #     **kwargs: Any,
+    # ) -> List[Document]:
+    #     raise NotImplementedError

    @classmethod
    def from_texts(
@@ -289,17 +296,18 @@ class __ModuleName__VectorStore(VectorStore):
    ) -> VST:
        raise NotImplementedError

-    @classmethod
-    async def afrom_texts(
-        cls: Type[VST],
-        texts: List[str],
-        embedding: Embeddings,
-        metadatas: Optional[List[dict]] = None,
-        **kwargs: Any,
-    ) -> VST:
-        return await asyncio.get_running_loop().run_in_executor(
-            None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
-        )
+    # optional: add custom async implementations
+    # @classmethod
+    # async def afrom_texts(
+    #     cls: Type[VST],
+    #     texts: List[str],
+    #     embedding: Embeddings,
+    #     metadatas: Optional[List[dict]] = None,
+    #     **kwargs: Any,
+    # ) -> VST:
+    #     return await asyncio.get_running_loop().run_in_executor(
+    #         None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
+    #     )

    def _select_relevance_score_fn(self) -> Callable[[float], float]:
        raise NotImplementedError
--- a/libs/cli/langchain_cli/integration_template/pyproject.toml
+++ b/libs/cli/langchain_cli/integration_template/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = [ "poetry-core>=1.0.0",]
+requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"

 [tool.poetry]
@@ -23,14 +23,16 @@ python = ">=3.9,<4.0"
 langchain-core = "^0.3.15"

 [tool.ruff.lint]
-select = [ "E", "F", "I", "T201",]
+select = ["E", "F", "I", "T201"]

 [tool.coverage.run]
-omit = [ "tests/*",]
+omit = ["tests/*"]

 [tool.pytest.ini_options]
 addopts = "--strict-markers --strict-config --durations=5"
-markers = [ "compile: mark placeholder test used to compile integration tests without running them",]
+markers = [
+    "compile: mark placeholder test used to compile integration tests without running them",
+]
 asyncio_mode = "auto"

 [tool.poetry.group.test]
@@ -48,11 +50,14 @@ optional = true
 [tool.poetry.group.dev]
 optional = true

+[tool.poetry.group.dev.dependencies]
+
 [tool.poetry.group.test.dependencies]
 pytest = "^7.4.3"
 pytest-asyncio = "^0.23.2"
 pytest-socket = "^0.7.0"
 pytest-watcher = "^0.3.4"
+langchain-tests = "^0.3.5"

 [tool.poetry.group.codespell.dependencies]
 codespell = "^2.2.6"
@@ -64,15 +69,3 @@ ruff = "^0.5"

 [tool.poetry.group.typing.dependencies]
 mypy = "^1.10"
-
-[tool.poetry.group.test.dependencies.langchain-core]
-path = "../../core"
-develop = true
-
-[tool.poetry.group.dev.dependencies.langchain-core]
-path = "../../core"
-develop = true
-
-[tool.poetry.group.typing.dependencies.langchain-core]
-path = "../../core"
-develop = true
--- a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_chat_models.py
+++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_chat_models.py
@@ -1,64 +1,21 @@
 """Test Chat__ModuleName__ chat model."""

+from typing import Type
+
 from __module_name__.chat_models import Chat__ModuleName__
+from langchain_tests.integration_tests import ChatModelIntegrationTests


-def test_stream() -> None:
-    """Test streaming tokens from OpenAI."""
-    llm = Chat__ModuleName__()
+class TestChatParrotLinkIntegration(ChatModelIntegrationTests):
+    @property
+    def chat_model_class(self) -> Type[Chat__ModuleName__]:
+        return Chat__ModuleName__

-    for token in llm.stream("I'm Pickle Rick"):
-        assert isinstance(token.content, str)
-
-
-async def test_astream() -> None:
-    """Test streaming tokens from OpenAI."""
-    llm = Chat__ModuleName__()
-
-    async for token in llm.astream("I'm Pickle Rick"):
-        assert isinstance(token.content, str)
-
-
-async def test_abatch() -> None:
-    """Test streaming tokens from Chat__ModuleName__."""
-    llm = Chat__ModuleName__()
-
-    result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
-    for token in result:
-        assert isinstance(token.content, str)
-
-
-async def test_abatch_tags() -> None:
-    """Test batch tokens from Chat__ModuleName__."""
-    llm = Chat__ModuleName__()
-
-    result = await llm.abatch(
-        ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
-    )
-    for token in result:
-        assert isinstance(token.content, str)
-
-
-def test_batch() -> None:
-    """Test batch tokens from Chat__ModuleName__."""
-    llm = Chat__ModuleName__()
-
-    result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
-    for token in result:
-        assert isinstance(token.content, str)
-
-
-async def test_ainvoke() -> None:
-    """Test invoke tokens from Chat__ModuleName__."""
-    llm = Chat__ModuleName__()
-
-    result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
-    assert isinstance(result.content, str)
-
-
-def test_invoke() -> None:
-    """Test invoke tokens from Chat__ModuleName__."""
-    llm = Chat__ModuleName__()
-
-    result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
-    assert isinstance(result.content, str)
+    @property
+    def chat_model_params(self) -> dict:
+        # These should be parameters used to initialize your integration for testing
+        return {
+            "model": "bird-brain-001",
+            "temperature": 0,
+            "parrot_buffer_length": 50,
+        }
--- a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_embeddings.py
+++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_embeddings.py
@@ -1,20 +1,16 @@
 """Test __ModuleName__ embeddings."""

+from typing import Type
+
 from __module_name__.embeddings import __ModuleName__Embeddings
+from langchain_tests.integration_tests import EmbeddingsIntegrationTests


-def test___module_name___embedding_documents() -> None:
-    """Test cohere embeddings."""
-    documents = ["foo bar"]
-    embedding = __ModuleName__Embeddings()
-    output = embedding.embed_documents(documents)
-    assert len(output) == 1
-    assert len(output[0]) > 0
+class TestParrotLinkEmbeddingsIntegration(EmbeddingsIntegrationTests):
+    @property
+    def embeddings_class(self) -> Type[__ModuleName__Embeddings]:
+        return __ModuleName__Embeddings

-
-def test___module_name___embedding_query() -> None:
-    """Test cohere embeddings."""
-    document = "foo bar"
-    embedding = __ModuleName__Embeddings()
-    output = embedding.embed_query(document)
-    assert len(output) > 0
+    @property
+    def embedding_model_params(self) -> dict:
+        return {"model": "nest-embed-001"}
--- a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_llms.py
+++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_llms.py
@@ -1,64 +0,0 @@
-"""Test __ModuleName__LLM llm."""
-
-from __module_name__.llms import __ModuleName__LLM
-
-
-def test_stream() -> None:
-    """Test streaming tokens from OpenAI."""
-    llm = __ModuleName__LLM()
-
-    for token in llm.stream("I'm Pickle Rick"):
-        assert isinstance(token, str)
-
-
-async def test_astream() -> None:
-    """Test streaming tokens from OpenAI."""
-    llm = __ModuleName__LLM()
-
-    async for token in llm.astream("I'm Pickle Rick"):
-        assert isinstance(token, str)
-
-
-async def test_abatch() -> None:
-    """Test streaming tokens from __ModuleName__LLM."""
-    llm = __ModuleName__LLM()
-
-    result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
-    for token in result:
-        assert isinstance(token, str)
-
-
-async def test_abatch_tags() -> None:
-    """Test batch tokens from __ModuleName__LLM."""
-    llm = __ModuleName__LLM()
-
-    result = await llm.abatch(
-        ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
-    )
-    for token in result:
-        assert isinstance(token, str)
-
-
-def test_batch() -> None:
-    """Test batch tokens from __ModuleName__LLM."""
-    llm = __ModuleName__LLM()
-
-    result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
-    for token in result:
-        assert isinstance(token, str)
-
-
-async def test_ainvoke() -> None:
-    """Test invoke tokens from __ModuleName__LLM."""
-    llm = __ModuleName__LLM()
-
-    result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
-    assert isinstance(result, str)
-
-
-def test_invoke() -> None:
-    """Test invoke tokens from __ModuleName__LLM."""
-    llm = __ModuleName__LLM()
-
-    result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
-    assert isinstance(result, str)
--- a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_retrievers.py
+++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_retrievers.py
@@ -0,0 +1,24 @@
+from typing import Type
+
+from __module_name__.retrievers import __ModuleName__Retriever
+from langchain_tests.integration_tests import (
+    RetrieversIntegrationTests,
+)
+
+
+class Test__ModuleName__Retriever(RetrieversIntegrationTests):
+    @property
+    def retriever_constructor(self) -> Type[__ModuleName__Retriever]:
+        """Get an empty vectorstore for unit tests."""
+        return __ModuleName__Retriever
+
+    @property
+    def retriever_constructor_params(self) -> dict:
+        return {"k": 2}
+
+    @property
+    def retriever_query_example(self) -> str:
+        """
+        Returns a dictionary representing the "args" of an example retriever call.
+        """
+        return "example query"
--- a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_tools.py
+++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_tools.py
@@ -0,0 +1,27 @@
+from typing import Type
+
+from __module_name__.tools import __ModuleName__Tool
+from langchain_tests.integration_tests import ToolsIntegrationTests
+
+
+class TestParrotMultiplyToolIntegration(ToolsIntegrationTests):
+    @property
+    def tool_constructor(self) -> Type[__ModuleName__Tool]:
+        return __ModuleName__Tool
+
+    @property
+    def tool_constructor_params(self) -> dict:
+        # if your tool constructor instead required initialization arguments like
+        # `def __init__(self, some_arg: int):`, you would return those here
+        # as a dictionary, e.g.: `return {'some_arg': 42}`
+        return {}
+
+    @property
+    def tool_invoke_params_example(self) -> dict:
+        """
+        Returns a dictionary representing the "args" of an example tool call.
+
+        This should NOT be a ToolCall dict - i.e. it should not
+        have {"name", "id", "args"} keys.
+        """
+        return {"a": 2, "b": 3}
--- a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_vectorstores.py
+++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_vectorstores.py
@@ -0,0 +1,37 @@
+from typing import AsyncGenerator, Generator
+
+import pytest
+from __module_name__.vectorstores import __ModuleName__VectorStore
+from langchain_core.vectorstores import VectorStore
+from langchain_tests.integration_tests import (
+    AsyncReadWriteTestSuite,
+    ReadWriteTestSuite,
+)
+
+
+class Test__ModuleName__VectorStoreSync(ReadWriteTestSuite):
+    @pytest.fixture()
+    def vectorstore(self) -> Generator[VectorStore, None, None]:  # type: ignore
+        """Get an empty vectorstore for unit tests."""
+        store = __ModuleName__VectorStore()
+        # note: store should be EMPTY at this point
+        # if you need to delete data, you may do so here
+        try:
+            yield store
+        finally:
+            # cleanup operations, or deleting data
+            pass
+
+
+class Test__ModuleName__VectorStoreAsync(AsyncReadWriteTestSuite):
+    @pytest.fixture()
+    async def vectorstore(self) -> AsyncGenerator[VectorStore, None]:  # type: ignore
+        """Get an empty vectorstore for unit tests."""
+        store = __ModuleName__VectorStore()
+        # note: store should be EMPTY at this point
+        # if you need to delete data, you may do so here
+        try:
+            yield store
+        finally:
+            # cleanup operations, or deleting data
+            pass
--- a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_chat_models.py
+++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_chat_models.py
@@ -1,8 +1,21 @@
 """Test chat model integration."""

+from typing import Type
+
 from __module_name__.chat_models import Chat__ModuleName__
+from langchain_tests.unit_tests import ChatModelUnitTests


-def test_initialization() -> None:
-    """Test chat model initialization."""
-    Chat__ModuleName__()
+class TestChat__ModuleName__Unit(ChatModelUnitTests):
+    @property
+    def chat_model_class(self) -> Type[Chat__ModuleName__]:
+        return Chat__ModuleName__
+
+    @property
+    def chat_model_params(self) -> dict:
+        # These should be parameters used to initialize your integration for testing
+        return {
+            "model": "bird-brain-001",
+            "temperature": 0,
+            "parrot_buffer_length": 50,
+        }
--- a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_embeddings.py
+++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_embeddings.py
@@ -1,8 +1,16 @@
 """Test embedding model integration."""

+from typing import Type
+
 from __module_name__.embeddings import __ModuleName__Embeddings
+from langchain_tests.unit_tests import EmbeddingsUnitTests


-def test_initialization() -> None:
-    """Test embedding model initialization."""
-    __ModuleName__Embeddings()
+class TestParrotLinkEmbeddingsUnit(EmbeddingsUnitTests):
+    @property
+    def embeddings_class(self) -> Type[__ModuleName__Embeddings]:
+        return __ModuleName__Embeddings
+
+    @property
+    def embedding_model_params(self) -> dict:
+        return {"model": "nest-embed-001"}
--- a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_imports.py
+++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_imports.py
@@ -1,12 +0,0 @@
-from __module_name__ import __all__
-
-EXPECTED_ALL = [
-    "__ModuleName__LLM",
-    "Chat__ModuleName__",
-    "__ModuleName__VectorStore",
-    "__ModuleName__Embeddings",
-]
-
-
-def test_all_imports() -> None:
-    assert sorted(EXPECTED_ALL) == sorted(__all__)
--- a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_llms.py
+++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_llms.py
@@ -1,8 +0,0 @@
-"""Test __ModuleName__ Chat API wrapper."""
-
-from __module_name__ import __ModuleName__LLM
-
-
-def test_initialization() -> None:
-    """Test integration initialization."""
-    __ModuleName__LLM()
--- a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_tools.py
+++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_tools.py
@@ -0,0 +1,27 @@
+from typing import Type
+
+from __module_name__.tools import __ModuleName__Tool
+from langchain_tests.unit_tests import ToolsUnitTests
+
+
+class TestParrotMultiplyToolUnit(ToolsUnitTests):
+    @property
+    def tool_constructor(self) -> Type[__ModuleName__Tool]:
+        return __ModuleName__Tool
+
+    @property
+    def tool_constructor_params(self) -> dict:
+        # if your tool constructor instead required initialization arguments like
+        # `def __init__(self, some_arg: int):`, you would return those here
+        # as a dictionary, e.g.: `return {'some_arg': 42}`
+        return {}
+
+    @property
+    def tool_invoke_params_example(self) -> dict:
+        """
+        Returns a dictionary representing the "args" of an example tool call.
+
+        This should NOT be a ToolCall dict - i.e. it should not
+        have {"name", "id", "args"} keys.
+        """
+        return {"a": 2, "b": 3}
--- a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_vectorstores.py
+++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_vectorstores.py
@@ -1,6 +0,0 @@
-from __module_name__.vectorstores import __ModuleName__VectorStore
-
-
-def test_initialization() -> None:
-    """Test integration vectorstore initialization."""
-    __ModuleName__VectorStore()
--- a/libs/cli/langchain_cli/namespaces/integration.py
+++ b/libs/cli/langchain_cli/namespaces/integration.py
@@ -6,7 +6,7 @@ import re
 import shutil
 import subprocess
 from pathlib import Path
-from typing import Optional
+from typing import Dict, Optional, cast

 import typer
 from typing_extensions import Annotated, TypedDict
@@ -15,19 +15,17 @@ from langchain_cli.utils.find_replace import replace_file, replace_glob

 integration_cli = typer.Typer(no_args_is_help=True, add_completion=False)

-Replacements = TypedDict(
-    "Replacements",
-    {
-        "__package_name__": str,
-        "__module_name__": str,
-        "__ModuleName__": str,
-        "__MODULE_NAME__": str,
-        "__package_name_short__": str,
-    },
-)
+
+class Replacements(TypedDict):
+    __package_name__: str
+    __module_name__: str
+    __ModuleName__: str
+    __MODULE_NAME__: str
+    __package_name_short__: str
+    __package_name_short_snake__: str


-def _process_name(name: str, *, community: bool = False):
+def _process_name(name: str, *, community: bool = False) -> Replacements:
    preprocessed = name.replace("_", "-").lower()

    if preprocessed.startswith("langchain-"):
@@ -42,7 +40,7 @@ def _process_name(name: str, *, community: bool = False):
        raise ValueError("Name should not end with `-`.")
    if preprocessed.find("--") != -1:
        raise ValueError("Name should not contain consecutive hyphens.")
-    replacements = {
+    replacements: Replacements = {
        "__package_name__": f"langchain-{preprocessed}",
        "__module_name__": "langchain_" + preprocessed.replace("-", "_"),
        "__ModuleName__": preprocessed.title().replace("-", ""),
@@ -52,7 +50,7 @@ def _process_name(name: str, *, community: bool = False):
    }
    if community:
        replacements["__module_name__"] = preprocessed.replace("-", "_")
-    return Replacements(replacements)
+    return replacements


@integration_cli.command()
@@ -71,19 +69,25 @@ def new(
            " This is used to name classes like `MyIntegrationVectorStore`"
        ),
    ] = None,
+    src: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="The name of the single template file to copy."
+            " e.g. `--src integration_template/chat_models.py "
+            "--dst my_integration/chat_models.py`. Can be used multiple times.",
+        ),
+    ] = None,
+    dst: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="The relative path to the integration package to place the new file in"
+            ". e.g. `my-integration/my_integration.py`",
+        ),
+    ] = None,
 ):
    """
    Creates a new integration package.
-
-    Should be run from libs/partners
    """
-    # confirm that we are in the right directory
-    if not Path.cwd().name == "partners" or not Path.cwd().parent.name == "libs":
-        typer.echo(
-            "This command should be run from the `libs/partners` directory in the "
-            "langchain-ai/langchain monorepo. Continuing is NOT recommended."
-        )
-        typer.confirm("Are you sure you want to continue?", abort=True)

    try:
        replacements = _process_name(name)
@@ -104,27 +108,66 @@ def new(
            "Name of integration in PascalCase", default=replacements["__ModuleName__"]
        )

-    destination_dir = Path.cwd() / replacements["__package_name_short__"]
-    if destination_dir.exists():
-        typer.echo(f"Folder {destination_dir} exists.")
-        raise typer.Exit(code=1)
-
-    # copy over template from ../integration_template
    project_template_dir = Path(__file__).parents[1] / "integration_template"
-    shutil.copytree(project_template_dir, destination_dir, dirs_exist_ok=False)
+    destination_dir = Path.cwd() / replacements["__package_name__"]
+    if not src and not dst:
+        if destination_dir.exists():
+            typer.echo(f"Folder {destination_dir} exists.")
+            raise typer.Exit(code=1)

-    # folder movement
-    package_dir = destination_dir / replacements["__module_name__"]
-    shutil.move(destination_dir / "integration_template", package_dir)
+        # copy over template from ../integration_template
+        shutil.copytree(project_template_dir, destination_dir, dirs_exist_ok=False)

-    # replacements in files
-    replace_glob(destination_dir, "**/*", replacements)
+        # folder movement
+        package_dir = destination_dir / replacements["__module_name__"]
+        shutil.move(destination_dir / "integration_template", package_dir)

-    # poetry install
-    subprocess.run(
-        ["poetry", "install", "--with", "lint,test,typing,test_integration"],
-        cwd=destination_dir,
-    )
+        # replacements in files
+        replace_glob(destination_dir, "**/*", cast(Dict[str, str], replacements))
+
+        # poetry install
+        subprocess.run(
+            ["poetry", "install", "--with", "lint,test,typing,test_integration"],
+            cwd=destination_dir,
+        )
+    else:
+        # confirm src and dst are the same length
+        if not src:
+            typer.echo("Cannot provide --dst without --src.")
+            raise typer.Exit(code=1)
+        src_paths = [project_template_dir / p for p in src]
+        if dst and len(src) != len(dst):
+            typer.echo("Number of --src and --dst arguments must match.")
+            raise typer.Exit(code=1)
+        if not dst:
+            # assume we're in a package dir, copy to equivalent path
+            dst_paths = [destination_dir / p for p in src]
+        else:
+            dst_paths = [Path.cwd() / p for p in dst]
+            dst_paths = [
+                p / f"{replacements['__package_name_short_snake__']}.ipynb"
+                if not p.suffix
+                else p
+                for p in dst_paths
+            ]
+
+        # confirm no duplicate dst_paths
+        if len(dst_paths) != len(set(dst_paths)):
+            typer.echo(
+                "Duplicate destination paths provided or computed - please "
+                "specify them explicitly with --dst."
+            )
+            raise typer.Exit(code=1)
+
+        # confirm no files exist at dst_paths
+        for dst_path in dst_paths:
+            if dst_path.exists():
+                typer.echo(f"File {dst_path} exists.")
+                raise typer.Exit(code=1)
+
+        for src_path, dst_path in zip(src_paths, dst_paths):
+            shutil.copy(src_path, dst_path)
+            replace_file(dst_path, cast(Dict[str, str], replacements))


 TEMPLATE_MAP: dict[str, str] = {
@@ -187,43 +230,15 @@ def create_doc(
    """
    Creates a new integration doc.
    """
-    try:
-        replacements = _process_name(name, community=component_type == "Tool")
-    except ValueError as e:
-        typer.echo(e)
-        raise typer.Exit(code=1)
-
-    if name_class:
-        if not re.match(r"^[A-Z][a-zA-Z0-9]*$", name_class):
-            typer.echo(
-                "Name should only contain letters (a-z, A-Z), numbers, and underscores"
-                ", and start with a capital letter."
-            )
-            raise typer.Exit(code=1)
-        replacements["__ModuleName__"] = name_class
-    else:
-        replacements["__ModuleName__"] = typer.prompt(
-            (
-                "The PascalCase name of the integration (e.g. `OpenAI`, `VertexAI`). "
-                "Do not include a 'Chat', 'VectorStore', etc. prefix/suffix."
-            ),
-            default=replacements["__ModuleName__"],
-        )
-    destination_path = (
-        Path.cwd()
-        / destination_dir
-        / (replacements["__package_name_short_snake__"] + ".ipynb")
-    )
-
-    # copy over template from ../integration_template
-    template_dir = Path(__file__).parents[1] / "integration_template" / "docs"
-    if component_type in TEMPLATE_MAP:
-        docs_template = template_dir / TEMPLATE_MAP[component_type]
-    else:
-        raise ValueError(
+    if component_type not in TEMPLATE_MAP:
+        typer.echo(
            f"Unrecognized {component_type=}. Expected one of {_component_types_str}."
        )
-    shutil.copy(docs_template, destination_path)
+        raise typer.Exit(code=1)

-    # replacements in file
-    replace_file(destination_path, replacements)
+    new(
+        name=name,
+        name_class=name_class,
+        src=[f"docs/{TEMPLATE_MAP[component_type]}"],
+        dst=[destination_dir],
+    )
--- a/libs/cli/langchain_cli/namespaces/migrate/generate/utils.py
+++ b/libs/cli/langchain_cli/namespaces/migrate/generate/utils.py
@@ -17,7 +17,7 @@ PARTNER_PKGS = PKGS_ROOT / "partners"
 class ImportExtractor(ast.NodeVisitor):
    def __init__(self, *, from_package: Optional[str] = None) -> None:
        """Extract all imports from the given code, optionally filtering by package."""
-        self.imports = []
+        self.imports: list = []
        self.package = from_package

    def visit_ImportFrom(self, node):
@@ -68,7 +68,7 @@ def find_subclasses_in_module(module, classes_: List[Type]) -> List[str]:
    return subclasses


-def _get_all_classnames_from_file(file: str, pkg: str) -> List[Tuple[str, str]]:
+def _get_all_classnames_from_file(file: Path, pkg: str) -> List[Tuple[str, str]]:
    """Extract all class names from a file."""
    with open(file, encoding="utf-8") as f:
        code = f.read()
@@ -145,7 +145,7 @@ def find_imports_from_package(
    return extractor.imports


-def _get_current_module(path: str, pkg_root: str) -> str:
+def _get_current_module(path: Path, pkg_root: str) -> str:
    """Convert a path to a module name."""
    path_as_pathlib = pathlib.Path(os.path.abspath(path))
    relative_path = path_as_pathlib.relative_to(pkg_root).with_suffix("")
--- a/libs/cli/langchain_cli/namespaces/migrate/main.py
+++ b/libs/cli/langchain_cli/namespaces/migrate/main.py
@@ -4,7 +4,7 @@ from pathlib import Path

 import rich
 import typer
-from gritql import run
+from gritql import run  # type: ignore
 from typer import Option


--- a/libs/cli/langchain_cli/utils/find_replace.py
+++ b/libs/cli/langchain_cli/utils/find_replace.py
@@ -13,7 +13,7 @@ def find_and_replace(source: str, replacements: Dict[str, str]) -> str:
    return rtn


-def replace_file(source: Path, replacements: Dict[str, str]) -> None:
+def replace_file(source: Path, replacements: dict[str, str]) -> None:
    try:
        content = source.read_text()
    except UnicodeDecodeError:
@@ -24,7 +24,7 @@ def replace_file(source: Path, replacements: Dict[str, str]) -> None:
        source.write_text(new_content)


-def replace_glob(parent: Path, glob: str, replacements: Dict[str, str]) -> None:
+def replace_glob(parent: Path, glob: str, replacements: dict[str, str]) -> None:
    for file in parent.glob(glob):
        if not file.is_file():
            continue
--- a/libs/cli/poetry.lock
+++ b/libs/cli/poetry.lock
--- a/libs/cli/pyproject.toml
+++ b/libs/cli/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-cli"
-version = "0.0.33"
+version = "0.0.35"
 description = "CLI for interacting with LangChain"
 authors = ["Erick Friis <erick@langchain.dev>"]
 readme = "README.md"
@@ -25,16 +25,18 @@ langchain = "langchain_cli.cli:app"
 langchain-cli = "langchain_cli.cli:app"

 [tool.poetry.group.dev.dependencies]
-poethepoet = "^0.24.1"
 pytest = "^7.4.2"
 pytest-watch = "^4.2.0"

 [tool.poetry.group.lint.dependencies]
 ruff = "^0.5"
+mypy = "^1.13.0"

 [tool.poetry.group.test.dependencies]
+langchain = {path = "../langchain", develop = true}

 [tool.poetry.group.typing.dependencies]
+langchain = {path = "../langchain", develop = true}

 [tool.poetry.group.test_integration.dependencies]

@@ -50,22 +52,11 @@ select = [
  "T201", # print
 ]

-[tool.poe.tasks]
-test = "poetry run pytest tests"
-watch = "poetry run ptw"
-version = "poetry version --short"
-bump = ["_bump_1", "_bump_2"]
-lint = ["_lint", "_check_formatting"]
-format = ["_format", "_lint_fix"]
-
-_bump_2.shell = """sed -i "" "/^__version__ =/c\\ \n__version__ = \\"$version\\"\n" langchain_cli/cli.py"""
-_bump_2.uses = { version = "version" }
-
-_bump_1 = "poetry version patch"
-_check_formatting = "poetry run ruff format . --diff"
-_lint = "poetry run ruff check ."
-_format = "poetry run ruff format ."
-_lint_fix = "poetry run ruff check . --fix"
+[tool.mypy]
+exclude = [
+  "langchain_cli/integration_template",
+  "langchain_cli/package_template",
+]

 [build-system]
 requires = ["poetry-core"]
--- a/libs/cli/scripts/generate_migrations.py
+++ b/libs/cli/scripts/generate_migrations.py
@@ -1,3 +1,4 @@
+# type: ignore
 """Script to generate migrations for the migration script."""

 import json
--- a/libs/cli/tests/integration_tests/test_compile.py
+++ b/libs/cli/tests/integration_tests/test_compile.py
@@ -0,0 +1,7 @@
+import pytest
+
+
+@pytest.mark.compile
+def test_placeholder() -> None:
+    """Used for compiling integration tests without running any real tests."""
+    pass
--- a/libs/cli/tests/unit_tests/migrate/cli_runner/test_cli.py
+++ b/libs/cli/tests/unit_tests/migrate/cli_runner/test_cli.py
@@ -41,6 +41,7 @@ def find_issue(current: Folder, expected: Folder) -> str:
    return "Unknown"


+@pytest.mark.xfail(reason="grit may not be installed in env")
 def test_command_line(tmp_path: Path) -> None:
    runner = CliRunner()

--- a/libs/community/README.md
+++ b/libs/community/README.md
@@ -13,7 +13,7 @@ pip install langchain-community

 LangChain Community contains third-party integrations that implement the base interfaces defined in LangChain Core, making them ready-to-use in any LangChain application.

-For full documentation see the [API reference](https://api.python.langchain.com/en/stable/community_api_reference.html).
+For full documentation see the [API reference](https://python.langchain.com/api_reference/community/index.html).

 ![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](https://raw.githubusercontent.com/langchain-ai/langchain/e1d113ea84a2edcf4a7709fc5be0e972ea74a5d9/docs/static/svg/langchain_stack_112024.svg "LangChain Framework Overview")

--- a/libs/community/extended_testing_deps.txt
+++ b/libs/community/extended_testing_deps.txt
@@ -46,6 +46,7 @@ motor>=3.3.1,<4
 msal>=1.25.0,<2
 mwparserfromhell>=0.6.4,<0.7
 mwxml>=0.3.3,<0.4
+needle-python>=0.4
 networkx>=3.2.1,<4
 newspaper3k>=0.2.8,<0.3
 numexpr>=2.8.6,<3
--- a/libs/community/langchain_community/adapters/openai.py
+++ b/libs/community/langchain_community/adapters/openai.py
@@ -91,6 +91,8 @@ def convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
            additional_kwargs["function_call"] = dict(function_call)
        if tool_calls := _dict.get("tool_calls"):
            additional_kwargs["tool_calls"] = tool_calls
+        if context := _dict.get("context"):
+            additional_kwargs["context"] = context
        return AIMessage(content=content, additional_kwargs=additional_kwargs)
    elif role == "system":
        return SystemMessage(content=_dict.get("content", ""))
@@ -135,6 +137,11 @@ def convert_message_to_dict(message: BaseMessage) -> dict:
            # If tool calls only, content is None not empty string
            if message_dict["content"] == "":
                message_dict["content"] = None
+        if "context" in message.additional_kwargs:
+            message_dict["context"] = message.additional_kwargs["context"]
+            # If context only, content is None not empty string
+            if message_dict["content"] == "":
+                message_dict["content"] = None
    elif isinstance(message, SystemMessage):
        message_dict = {"role": "system", "content": message.content}
    elif isinstance(message, FunctionMessage):
--- a/Show More
+++ b/Show More