Compare commits

...

120 Commits

Author SHA1 Message Date
ccurme
092697de60 infra: temporarily drop OpenAI from core release test matrix (#31318)
As part of core releases we run tests on the last released version of
some packages (including langchain-openai) using the new version of
langchain-core. We run langchain-openai's test suite as it was when it
was last released.

OpenAI has since updated their API— relaxing constraints on what schemas
are supported when `strict=True`— causing these tests to break. They
have since been fixed. But the old tests will continue to fail.

Will revert this change after we release OpenAI today.
2025-05-22 16:07:45 +00:00
ccurme
71c074d28f core: release 0.3.61 (#31317) 2025-05-22 11:54:28 -04:00
ccurme
053a1246da openai[patch]: support built-in code interpreter and remote MCP tools (#31304) 2025-05-22 11:47:57 -04:00
ccurme
1b5ffe4107 openai[patch]: run _tokenize in background thread in async embedding invocations (#31312) 2025-05-22 10:27:33 -04:00
Ishan Goswami
f16456139b exa docs and python package update (#31307)
Added support for new Exa API features. Updated Exa docs and python
package (langchain-exa).

Description

Added support for new Exa API features in the langchain-exa package:
- Added max_characters option for text content
- Added support for summary and custom summary prompts
- Added livecrawl option with "always", "fallback", "never" settings
- Added "auto" option for search type
- Updated documentation and tests

Dependencies
- No new dependencies required. Using existing features from exa-py.

twitter: @theishangoswami

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-21 21:33:30 -04:00
Mason Daugherty
cf1fa27e27 docs: correct groq description typo (#31308)
**Description:** a space was missing between the words "Groq" and
"developed" and has been added
2025-05-21 21:23:37 -04:00
ccurme
beacedd6b3 openai[patch]: update tests for strict schemas (#31306)
Following recent [changes](https://platform.openai.com/docs/changelog).
2025-05-21 22:06:17 +00:00
Vikram Saraph
53d6286539 Fix link to deprecation alternative for ConversationChain in docs (#31299)
**Description:** ConversationChain has been deprecated, and the
documentation says to use RunnableWithMessageHistory in its place, but
the link at the top of the page to RunnableWithMessageHistory is broken
(it's rendering as "html()"). See here at the top of the page:
https://python.langchain.com/api_reference/langchain/chains/langchain.chains.conversation.base.ConversationChain.html.
This PR fixes the link.
**Issue**: N/A
**Dependencies**: N/A
**Twitter handle:**: If you're on Bluesky, I'm @vikramsaraph.com
2025-05-21 09:31:06 -04:00
Ako
7b45d46210 ci: fix typo in doc-string (#31284)
Fix typo

---------

Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
2025-05-20 20:52:18 +00:00
Shaya Ulman
580fc7d464 docs: fix broken link (#31294)
As stated
[here](https://github.com/langchain-ai/langchain/blob/master/libs/community/README.md)
this package has moved, hence the broken link
2025-05-20 16:49:50 -04:00
Mert Arcan
6993bc9ad1 typo fix (#31295)
typo fix
%pip install -upgrade langchain-google
to:
%pip install --upgrade langchain-google
2025-05-20 16:49:21 -04:00
ccurme
dcb5aba999 openai[patch]: reduce tested constraints on strict schema adherence for Responses API (#31290)
Scheduled testing started failing today because the Responses API
stopped raising `BadRequestError` for a schema that was previously
invalid when `strict=True`.

Although docs still say that [some type-specific keywords are not yet
supported](https://platform.openai.com/docs/guides/structured-outputs#some-type-specific-keywords-are-not-yet-supported)
(including `minimum` and `maximum` for numbers), the below appears to
run and correctly respect the constraints:
```python
import json
import openai

maximums = list(range(1, 11))
arg_values = []
for maximum in maximums:

    tool = {
        "type": "function",
        "name": "magic_function",
        "description": "Applies a magic function to an input.",
        "parameters": {
            "properties": {
                "input": {"maximum": maximum, "minimum": 0, "type": "integer"}
            },
            "required": ["input"],
            "type": "object",
            "additionalProperties": False
        },
        "strict": True
    }
    
    client = openai.OpenAI()
    
    response = client.responses.create(
        model="gpt-4.1",
        input=[{"role": "user", "content": "What is the value of magic_function(3)? Use the tool."}],
        tools=[tool],
    )
    function_call = next(item for item in response.output if item.type == "function_call")
    args = json.loads(function_call.arguments)
    arg_values.append(args["input"])


print(maximums)
print(arg_values)

# [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# [1, 2, 3, 3, 3, 3, 3, 3, 3, 3]
```
Until yesterday this raised BadRequestError.

The same is not true of Chat Completions, which appears to still raise
BadRequestError
```python
tool = {
    "type": "function",
    "function": {
        "name": "magic_function",
        "description": "Applies a magic function to an input.",
        "parameters": {
            "properties": {
                "input": {"maximum": 5, "minimum": 0, "type": "integer"}
            },
            "required": ["input"],
            "type": "object",
            "additionalProperties": False
        },
        "strict": True
    }
}

response = client.chat.completions.create(
    model="gpt-4.1",
    messages=[{"role": "user", "content": "What is the value of magic_function(3)? Use the tool."}],
    tools=[tool],
)
response  # raises BadRequestError
```

Here we update tests accordingly.
2025-05-20 14:50:31 +00:00
Ahmad Elmalah
f29659728c Docs: Fixing a typo in retrievers concepts page (#31288)
The second 'is` is redundant, it makes the meaning confusing
2025-05-20 09:08:33 -04:00
Michael Li
916768e3c1 doc: replace initialize agent in steam.ipynb (#31283)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-19 19:21:35 -04:00
Himanshu Sharma
ff12555bdc docs: clarify hub.pull behavior with non-US LangSmith endpoints (#31270)
**Description**:

This PR updates the documentation to address a potential issue when
using `hub.pull(...)` with non-US LangSmith endpoints (e.g.,
`https://eu.api.smith.langchain.com`).

By default, the `hub.pull` function assumes the non US-based API URL.
When the `LANGSMITH_ENDPOINT` environment variable is set to a non-US
region, this can lead to `LangSmithNotFoundError 404 not found` errors
when pulling public assets from the LangChain Hub.

Issue: #31191

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-19 23:06:37 +00:00
Michael Li
0c6137ec2b docs: replace deprecated load_tools in google_jobs.ipynb (#31272)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-19 18:58:31 -04:00
ccurme
bf645c83f4 voyageai: remove from monorepo (#31281)
langchain-voyageai is now maintained at
https://github.com/voyage-ai/langchain-voyageai.
2025-05-19 16:33:38 +00:00
DarinVerheijke
49fbcec34f community: add Featherless.ai integration (#31250)
Update docs to add Featherless.ai Provider & Chat Model
- **Description:** Adding Featherless.ai as provider in teh
documentations giving access to over 4300+ open-source models
- **Twitter handle:** https://x.com/FeatherlessAI
2025-05-19 10:40:25 -04:00
ccurme
32fcc97a90 openai[patch]: compat with Bedrock Converse (#31280)
ChatBedrockConverse passes through reasoning content blocks in [Bedrock
Converse
format](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ContentBlock.html).

Similar to how we handle Anthropic thinking blocks, here we ensure these
are filtered out of OpenAI request payloads.

Resolves https://github.com/langchain-ai/langchain/issues/31279.
2025-05-19 10:35:26 -04:00
Xu Song
8b6fec89bc docs: Update flashrank-reranker.ipynb (#31274)
- **Description:** Fix import in doc, `FlashrankRerank` has been moved
to `langchain_community`
2025-05-19 10:11:15 -04:00
ccurme
ea1f9e2d5b docs: remove integration page with DSPy (#31278)
DSPy removed their LangChain integration in version 2.6.6.

Here we remove the page and add a redirect to the LangChain v0.2 docs
for posterity.

We add an admonition to the v0.2 docs in
https://github.com/langchain-ai/langchain/pull/31277.
2025-05-19 10:10:20 -04:00
Christophe Bornet
17c5a1621f core: Improve Runnable __or__ method typing annotations (#31273)
* It is possible to chain a `Runnable` with an `AsyncIterator` as seen
in `test_runnable.py`.
* Iterator and AsyncIterator Input/Output of Callables must be put
before `Callable[[Other], Any]` otherwise the pattern matching picks the
latter.
2025-05-19 09:32:31 -04:00
mathislindner
e1af509966 anthropic: emit informative error message if there are only system messages in a prompt (#30822)
**PR message**: Not sure if I put the check at the right spot, but I
thought throwing the error before the loop made sense to me.
**Description:** Checks if there are only system messages using
AnthropicChat model and throws an error if it's the case. Check Issue
for more details
**Issue:** #30764

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-16 20:43:59 +00:00
OysterMax
eb25d7472d core: support Union type args in strict mode of OpenAI function calling / structured output (#30971)
**Issue:**[
#309070](https://github.com/langchain-ai/langchain/issues/30970)

**Cause**
Arg type in python code
```
arg: Union[SubSchema1, SubSchema2]
``` 
is translated to `anyOf` in **json schema**
```
"anyOf" : [{sub schema 1 ...}, {sub schema 1 ...}]
```
The value of anyOf is a list sub schemas. 
The bug is caused since the sub schemas inside `anyOf` list is not taken
care of.
The location where the issue happens is `convert_to_openai_function`
function -> `_recursive_set_additional_properties_false` function, that
recursively adds `"additionalProperties": false` to json schema which is
[required by OpenAI's strict function
calling](https://platform.openai.com/docs/guides/structured-outputs?api-mode=responses#additionalproperties-false-must-always-be-set-in-objects)

**Solution:**
This PR fixes this issue by iterating each sub schema inside `anyOf`
list.
A unit test is added.

**Twitter handle:** shengboma 


If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: ccurme <chester.curme@gmail.com>
2025-05-16 16:20:32 -04:00
Christophe Bornet
c982573f1e core: Add ruff rules A (builtins shadowing) (#29312)
See https://docs.astral.sh/ruff/rules/#flake8-builtins-a
* Renamed vars where possible
* Added `noqa` where backward compatibility was needed
* Added `@override` when applicable
2025-05-16 15:19:37 -04:00
Shkarupa Alex
671e4fd114 langchain[patch]: Allow async indexing code to work for vectorstores that only defined sync delete (#30869)
`aindex` function should check not only `adelete` method, but `delete`
method too

**PR title**: "core: fix async indexing issue with adelete/delete
checking"
**PR message**: Currently `langchain.indexes.aindex` checks if vector
store has overrided adelete method. But due to `adelete` default
implementation store can have just `delete` overrided to make `adelete`
working.

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
2025-05-16 15:10:25 -04:00
Shashank Reddy Boosi
bd367ba10c docs: Update confluence code blocks with all the latest changes and update documentation (#31242)
Description: This document change concerns the document-loader
integration, specifically `Confluence`.

I am trying to use the ConfluenceLoader and came across deprecations
when I followed the instructions in the documentation. So I updated the
code blocks with the latest changes made to langchain, and also updated
the documentation for better readability

---------

Co-authored-by: ccurme <chester.curme@gmail.com>
2025-05-16 09:47:06 -04:00
Baitur Ulukbekov
1f43b6062e Fix link SQL: Agents to proper destination in sql_large_db.ipynb (#31255)
Thank you for contributing to LangChain!

- [ ] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [ ] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [ ] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-16 12:52:09 +00:00
Eugene Yurtsev
c178ad87b6 docs: remove docs that disable ssl verification (#31254)
Remove examples that disable ssl verification
2025-05-15 17:21:22 -04:00
mateencog
12b063eb67 Fix: Extend pyproject.toml check in create_api_rst.py to langchain/libs folder (#31128)
- **Description:** The file ```docs/api_reference/create_api_rst.py```
uses a pyproject.toml check to remove partners which don't have a valid
pyproject.toml. This PR extends that check to ```/langchain/libs/*```
sub-directories as well. Without this the ```make api_docs_build```
command fails (see error).
- **Issue:** #31109
- **Dependencies:** none
- **Error Traceback:** 
uv run --no-group test python docs/api_reference/create_api_rst.py
Starting to build API reference files.
Building package: community
pyproject.toml not found in /langchain/libs/community.
You are either attempting to build a directory which is not a package or
the package is missing a pyproject.toml file which should be
added.Aborting the build.
make: *** [Makefile:35: api_docs_build] Error 1
2025-05-15 16:53:45 -04:00
ccurme
a401d7e52a ollama: release 0.3.3 (#31253) 2025-05-15 16:24:04 -04:00
Alexey Bondarenko
9efafe3337 ollama: Add separate kwargs parameter for async client (#31209)
**Description**:

Add a `async_client_kwargs` field to ollama chat/llm/embeddings adapters
that is passed to async httpx client constructor.

**Motivation:**

In my use-case:
- chat/embedding model adapters may be created frequently, sometimes to
be called just once or to never be called at all
- they may be used in bots sunc and async mode (not known at the moment
they are created)

So, I want to keep a static transport instance maintaining connection
pool, so model adapters can be created and destroyed freely. But that
doesn't work when both sync and async functions are in use as I can only
pass one transport instance for both sync and async client, while
transport types must be different for them. So I can't make both sync
and async calls use shared transport with current model adapter
interfaces.

In this PR I add a separate `async_client_kwargs` that gets passed to
async client constructor, so it will be possible to pass a separate
transport instance. For sake of backwards compatibility, it is merged
with `client_kwargs`, so nothing changes when it is not set.

I am unable to run linter right now, but the changes look ok.
2025-05-15 16:10:10 -04:00
dependabot[bot]
03adca6c44 build(deps): bump Ana06/get-changed-files from 2.2.0 to 2.3.0 (#31236)
Bumps
[Ana06/get-changed-files](https://github.com/ana06/get-changed-files)
from 2.2.0 to 2.3.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/ana06/get-changed-files/releases">Ana06/get-changed-files's
releases</a>.</em></p>
<blockquote>
<h2>v2.3.0</h2>
<p>This project is a fork of <a
href="https://github.com/jitterbit/get-changed-files">jitterbit/get-changed-files</a>,
which:</p>
<ul>
<li>Supports <code>pull_request_target</code></li>
<li>Allows to filter files using regular expressions</li>
<li>Removes the ahead check</li>
<li>Considers renamed modified files as modified</li>
<li>Adds <code>added_modified_renamed</code> that includes renamed
non-modified files and all files in <code>added_modified</code></li>
<li>Uses Node 20</li>
</ul>
<h2>Changes</h2>
<ul>
<li>Update to Node 20</li>
<li>Update dependencies</li>
</ul>
<h2>Raw diff</h2>
<p><a
href="https://github.com/Ana06/get-changed-files/compare/v2.2.0...v2.3.0">https://github.com/Ana06/get-changed-files/compare/v2.2.0...v2.3.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="25f79e676e"><code>25f79e6</code></a>
Update version in package.json</li>
<li><a
href="6f5373eb01"><code>6f5373e</code></a>
Merge pull request <a
href="https://redirect.github.com/ana06/get-changed-files/issues/42">#42</a>
from Ana06/release2-3</li>
<li><a
href="64dffb46a4"><code>64dffb4</code></a>
Prepare 2.3.0 release</li>
<li><a
href="791f7645b7"><code>791f764</code></a>
[CI] Update actions/checkout</li>
<li><a
href="5a4a136e91"><code>5a4a136</code></a>
[CI] Ensure GH action uses node version 20</li>
<li><a
href="38bdb2e498"><code>38bdb2e</code></a>
Update to Node 20</li>
<li><a
href="5558be5781"><code>5558be5</code></a>
Merge pull request <a
href="https://redirect.github.com/ana06/get-changed-files/issues/30">#30</a>
from Ana06/dependabot/npm_and_yarn/decode-uri-componen...</li>
<li><a
href="6a376fdbb3"><code>6a376fd</code></a>
Merge pull request <a
href="https://redirect.github.com/ana06/get-changed-files/issues/31">#31</a>
from Ana06/dependabot/npm_and_yarn/qs-6.5.3</li>
<li><a
href="ace6e7bcbb"><code>ace6e7b</code></a>
Merge pull request <a
href="https://redirect.github.com/ana06/get-changed-files/issues/32">#32</a>
from brtrick/main</li>
<li><a
href="a102fae9bf"><code>a102fae</code></a>
Merge pull request <a
href="https://redirect.github.com/ana06/get-changed-files/issues/33">#33</a>
from Ana06/dependabot/npm_and_yarn/json5-2.2.3</li>
<li>Additional commits viewable in <a
href="https://github.com/ana06/get-changed-files/compare/v2.2.0...v2.3.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Ana06/get-changed-files&package-manager=github_actions&previous-version=2.2.0&new-version=2.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-15 16:08:16 -04:00
ccurme
6bbc12b7f7 chroma: release 0.2.4 (#31252) 2025-05-15 15:58:29 -04:00
Jai Radhakrishnan
aa4890c136 partners: update deps for langchain-chroma (#31251)
Updates dependencies to Chroma to integrate the major release of Chroma
with improved performance, and to fix issues users have been seeing
using the latest chroma docker image with langchain-chroma

https://github.com/langchain-ai/langchain/issues/31047#issuecomment-2850790841
Updates chromadb dependency to >=1.0.9

This also removes the dependency of chroma-hnswlib, meaning it can run
against python 3.13 runners for tests as well.

Tested this by pulling the latest Chroma docker image, running
langchain-chroma using client mode
```
httpClient = chromadb.HttpClient(host="localhost", port=8000)

vector_store = Chroma(
    client=httpClient,
    collection_name="test",
    embedding_function=embeddings,
)
```
2025-05-15 15:55:15 -04:00
Christophe Bornet
a8f2ddee31 core: Add ruff rules RUF (#29353)
See https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
Mostly:
* [RUF022](https://docs.astral.sh/ruff/rules/unsorted-dunder-all/)
(unsorted `__all__`)
* [RUF100](https://docs.astral.sh/ruff/rules/unused-noqa/) (unused noqa)
*
[RUF021](https://docs.astral.sh/ruff/rules/parenthesize-chained-operators/)
(parenthesize-chained-operators)
*
[RUF015](https://docs.astral.sh/ruff/rules/unnecessary-iterable-allocation-for-first-element/)
(unnecessary-iterable-allocation-for-first-element)
*
[RUF005](https://docs.astral.sh/ruff/rules/collection-literal-concatenation/)
(collection-literal-concatenation)
* [RUF046](https://docs.astral.sh/ruff/rules/unnecessary-cast-to-int/)
(unnecessary-cast-to-int)

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
2025-05-15 15:43:57 -04:00
Christophe Bornet
6cd1aadf60 langchain: use mypy strict checking with exemptions (#31018)
* Use strict checking and exclude some rules as TODOs
* Fix imports not exposed in `__all__`

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
2025-05-15 11:37:18 -04:00
Christophe Bornet
eab8484a80 text-splitters[patch]: fix some import-untyped errors (#31030) 2025-05-15 11:34:22 -04:00
ccurme
672339f3c6 core: release 0.3.60 (#31249) 2025-05-15 11:14:04 -04:00
renchao
6f2acbcf2e Update sql_large_db.ipynb (#31241)
"Alanis Morissette" spelling error

Thank you for contributing to LangChain!

- [ ] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [ ] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [ ] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-15 11:07:51 -04:00
ccurme
8b145d5dc3 openai: release 0.3.17 (#31246) 2025-05-15 09:18:22 -04:00
dependabot[bot]
d4f77a8c8f build(deps): bump actions/setup-python from 3 to 5 (#31234)
Bumps [actions/setup-python](https://github.com/actions/setup-python)
from 3 to 5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/setup-python/releases">actions/setup-python's
releases</a>.</em></p>
<blockquote>
<h2>v5.0.0</h2>
<h2>What's Changed</h2>
<p>In scope of this release, we update node version runtime from node16
to node20 (<a
href="https://redirect.github.com/actions/setup-python/pull/772">actions/setup-python#772</a>).
Besides, we update dependencies to the latest versions.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-python/compare/v4.8.0...v5.0.0">https://github.com/actions/setup-python/compare/v4.8.0...v5.0.0</a></p>
<h2>v4.9.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Add workflow file for publishing releases to immutable action
package by <a
href="https://github.com/aparnajyothi-y"><code>@​aparnajyothi-y</code></a>
in <a
href="https://redirect.github.com/actions/setup-python/pull/1084">actions/setup-python#1084</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-python/compare/v4...v4.9.1">https://github.com/actions/setup-python/compare/v4...v4.9.1</a></p>
<h2>v4.9.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Upgrade <code>actions/cache</code> to 4.0.3 by <a
href="https://github.com/priya-kinthali"><code>@​priya-kinthali</code></a>
in <a
href="https://redirect.github.com/actions/setup-python/pull/1073">actions/setup-python#1073</a>
In scope of this release we updated actions/cache package to ensure
continued support and compatibility, as older versions of the package
are now deprecated. For more information please refer to the <a
href="https://github.com/actions/toolkit/discussions/1890">toolkit/cache</a>.</li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-python/compare/v4.8.0...v4.9.0">https://github.com/actions/setup-python/compare/v4.8.0...v4.9.0</a></p>
<h2>v4.8.0</h2>
<h2>What's Changed</h2>
<p>In scope of this release we added support for GraalPy (<a
href="https://redirect.github.com/actions/setup-python/pull/694">actions/setup-python#694</a>).
You can use this snippet to set up GraalPy:</p>
<pre lang="yaml"><code>steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4 
  with:
    python-version: 'graalpy-22.3' 
- run: python my_script.py
</code></pre>
<p>Besides, the release contains such changes as:</p>
<ul>
<li>Trim python version when reading from file by <a
href="https://github.com/FerranPares"><code>@​FerranPares</code></a> in
<a
href="https://redirect.github.com/actions/setup-python/pull/628">actions/setup-python#628</a></li>
<li>Use non-deprecated versions in examples by <a
href="https://github.com/jeffwidman"><code>@​jeffwidman</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/724">actions/setup-python#724</a></li>
<li>Change deprecation comment to past tense by <a
href="https://github.com/jeffwidman"><code>@​jeffwidman</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/723">actions/setup-python#723</a></li>
<li>Bump <code>@​babel/traverse</code> from 7.9.0 to 7.23.2 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/743">actions/setup-python#743</a></li>
<li>advanced-usage.md: Encourage the use actions/checkout@v4 by <a
href="https://github.com/cclauss"><code>@​cclauss</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/729">actions/setup-python#729</a></li>
<li>Examples now use checkout@v4 by <a
href="https://github.com/simonw"><code>@​simonw</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/738">actions/setup-python#738</a></li>
<li>Update actions/checkout to v4 by <a
href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a>
in <a
href="https://redirect.github.com/actions/setup-python/pull/761">actions/setup-python#761</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/FerranPares"><code>@​FerranPares</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/setup-python/pull/628">actions/setup-python#628</a></li>
<li><a href="https://github.com/timfel"><code>@​timfel</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/setup-python/pull/694">actions/setup-python#694</a></li>
<li><a
href="https://github.com/jeffwidman"><code>@​jeffwidman</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/setup-python/pull/724">actions/setup-python#724</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-python/compare/v4...v4.8.0">https://github.com/actions/setup-python/compare/v4...v4.8.0</a></p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="a26af69be9"><code>a26af69</code></a>
Bump ts-jest from 29.1.2 to 29.3.2 (<a
href="https://redirect.github.com/actions/setup-python/issues/1081">#1081</a>)</li>
<li><a
href="30eafe9548"><code>30eafe9</code></a>
Bump prettier from 2.8.8 to 3.5.3 (<a
href="https://redirect.github.com/actions/setup-python/issues/1046">#1046</a>)</li>
<li><a
href="5d95bc16d4"><code>5d95bc1</code></a>
Bump semver and <code>@​types/semver</code> (<a
href="https://redirect.github.com/actions/setup-python/issues/1091">#1091</a>)</li>
<li><a
href="6ed2c67c8a"><code>6ed2c67</code></a>
Fix for Candidate Not Iterable Error (<a
href="https://redirect.github.com/actions/setup-python/issues/1082">#1082</a>)</li>
<li><a
href="e348410e00"><code>e348410</code></a>
Remove Ubuntu 20.04 from workflows due to deprecation from 2025-04-15
(<a
href="https://redirect.github.com/actions/setup-python/issues/1065">#1065</a>)</li>
<li><a
href="8d9ed9ac5c"><code>8d9ed9a</code></a>
Add e2e Testing for free threaded and Bump <code>@​action/cache</code>
from 4.0.0 to 4.0.3 ...</li>
<li><a
href="19e4675e06"><code>19e4675</code></a>
Add support for .tool-versions file in setup-python (<a
href="https://redirect.github.com/actions/setup-python/issues/1043">#1043</a>)</li>
<li><a
href="6fd11e170a"><code>6fd11e1</code></a>
Bump <code>@​actions/glob</code> from 0.4.0 to 0.5.0 (<a
href="https://redirect.github.com/actions/setup-python/issues/1015">#1015</a>)</li>
<li><a
href="9e62be81b2"><code>9e62be8</code></a>
Support free threaded Python versions like '3.13t' (<a
href="https://redirect.github.com/actions/setup-python/issues/973">#973</a>)</li>
<li><a
href="6ca8e8598f"><code>6ca8e85</code></a>
Bump <code>@​vercel/ncc</code> from 0.38.1 to 0.38.3 (<a
href="https://redirect.github.com/actions/setup-python/issues/1016">#1016</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/setup-python/compare/v3...v5">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-python&package-manager=github_actions&previous-version=3&new-version=5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-14 15:48:33 -04:00
dependabot[bot]
71b71768bf build(deps): bump actions/setup-node from 3 to 4 (#31237)
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 3
to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/setup-node/releases">actions/setup-node's
releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<h2>What's Changed</h2>
<p>In scope of this release we changed version of node runtime for
action from node16 to node20 and updated dependencies in <a
href="https://redirect.github.com/actions/setup-node/pull/866">actions/setup-node#866</a></p>
<p>Besides, release contains such changes as:</p>
<ul>
<li>Upgrade actions/checkout to v4 by <a
href="https://github.com/gmembre-zenika"><code>@​gmembre-zenika</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/868">actions/setup-node#868</a></li>
<li>Update actions/checkout for documentation and yaml by <a
href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/876">actions/setup-node#876</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/gmembre-zenika"><code>@​gmembre-zenika</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/setup-node/pull/868">actions/setup-node#868</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v3...v4.0.0">https://github.com/actions/setup-node/compare/v3...v4.0.0</a></p>
<h2>v3.9.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Add workflow file for publishing releases to immutable action
package by <a
href="https://github.com/aparnajyothi-y"><code>@​aparnajyothi-y</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/1281">actions/setup-node#1281</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v3...v3.9.1">https://github.com/actions/setup-node/compare/v3...v3.9.1</a></p>
<h2>v3.9.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Upgrade <code>@​actions/cache</code> to 4.0.3 by <a
href="https://github.com/gowridurgad"><code>@​gowridurgad</code></a> in
<a
href="https://redirect.github.com/actions/setup-node/pull/1270">actions/setup-node#1270</a>
In scope of this release we updated actions/cache package to ensure
continued support and compatibility, as older versions of the package
are now deprecated. For more information please refer to the <a
href="https://github.com/actions/toolkit/discussions/1890">toolkit/cache</a>.</li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v3...v3.9.0">https://github.com/actions/setup-node/compare/v3...v3.9.0</a></p>
<h2>v3.8.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Update semver by <a
href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/861">actions/setup-node#861</a></li>
<li>Update temp directory creation by <a
href="https://github.com/nikolai-laevskii"><code>@​nikolai-laevskii</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/859">actions/setup-node#859</a></li>
<li>Bump <code>@​babel/traverse</code> from 7.15.4 to 7.23.2 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-node/pull/870">actions/setup-node#870</a></li>
<li>Add notice about binaries not being updated yet by <a
href="https://github.com/nikolai-laevskii"><code>@​nikolai-laevskii</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/872">actions/setup-node#872</a></li>
<li>Update toolkit cache and core by <a
href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a>
and <a
href="https://github.com/seongwon-privatenote"><code>@​seongwon-privatenote</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/875">actions/setup-node#875</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v3...v3.8.2">https://github.com/actions/setup-node/compare/v3...v3.8.2</a></p>
<h2>v3.8.1</h2>
<h2>What's Changed</h2>
<p>In scope of this release, the filter was removed within the
cache-save step by <a
href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/831">actions/setup-node#831</a>.
It is filtered and checked in the toolkit/cache library.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v3...v3.8.1">https://github.com/actions/setup-node/compare/v3...v3.8.1</a></p>
<h2>v3.8.0</h2>
<h2>What's Changed</h2>
<h3>Bug fixes:</h3>
<ul>
<li>Add check for existing paths by <a
href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/803">actions/setup-node#803</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="49933ea528"><code>49933ea</code></a>
Bump <code>@​action/cache</code> from 4.0.2 to 4.0.3 (<a
href="https://redirect.github.com/actions/setup-node/issues/1262">#1262</a>)</li>
<li><a
href="e3ce749e20"><code>e3ce749</code></a>
feat: support private mirrors (<a
href="https://redirect.github.com/actions/setup-node/issues/1240">#1240</a>)</li>
<li><a
href="40337cb8f7"><code>40337cb</code></a>
Add support for indented eslint output (<a
href="https://redirect.github.com/actions/setup-node/issues/1245">#1245</a>)</li>
<li><a
href="1ccdddc9b8"><code>1ccdddc</code></a>
Make eslint-compact matcher compatible with Stylelint (<a
href="https://redirect.github.com/actions/setup-node/issues/98">#98</a>)</li>
<li><a
href="cdca7365b2"><code>cdca736</code></a>
Bump <code>@​actions/tool-cache</code> from 2.0.1 to 2.0.2 (<a
href="https://redirect.github.com/actions/setup-node/issues/1220">#1220</a>)</li>
<li><a
href="22c0e7494f"><code>22c0e74</code></a>
Bump <code>@​vercel/ncc</code> from 0.38.1 to 0.38.3 (<a
href="https://redirect.github.com/actions/setup-node/issues/1203">#1203</a>)</li>
<li><a
href="a7c2d9473e"><code>a7c2d94</code></a>
actions/cache upgrade (<a
href="https://redirect.github.com/actions/setup-node/issues/1251">#1251</a>)</li>
<li><a
href="802632921f"><code>8026329</code></a>
Bump <code>@​actions/glob</code> from 0.4.0 to 0.5.0 (<a
href="https://redirect.github.com/actions/setup-node/issues/1200">#1200</a>)</li>
<li><a
href="1d0ff469b7"><code>1d0ff46</code></a>
Bump undici from 5.28.4 to 5.28.5 (<a
href="https://redirect.github.com/actions/setup-node/issues/1205">#1205</a>)</li>
<li><a
href="574f09a9fa"><code>574f09a</code></a>
Bump <code>@​types/jest</code> from 29.5.12 to 29.5.14 (<a
href="https://redirect.github.com/actions/setup-node/issues/1201">#1201</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/setup-node/compare/v3...v4">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-node&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-14 14:43:18 -04:00
Christophe Bornet
921573e2b7 core: Add ruff rules SLF (#30666)
Add ruff rules SLF: https://docs.astral.sh/ruff/rules/#flake8-self-slf

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
2025-05-14 18:42:39 +00:00
dependabot[bot]
d8a7eda12e build(deps): bump astral-sh/setup-uv from 5 to 6 (#31235)
Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 5
to 6.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/astral-sh/setup-uv/releases">astral-sh/setup-uv's
releases</a>.</em></p>
<blockquote>
<h2>v6.0.0 🌈 activate-environment and working-directory</h2>
<h2>Changes</h2>
<p>This version contains some breaking changes which have been gathering
up for a while. Lets dive into them:</p>
<ul>
<li><a
href="https://github.com/astral-sh/setup-uv/blob/HEAD/#activate-environment">Activate
environment</a></li>
<li><a
href="https://github.com/astral-sh/setup-uv/blob/HEAD/#working-directory">Working
Directory</a></li>
<li><a
href="https://github.com/astral-sh/setup-uv/blob/HEAD/#default-cache-dependency-glob">Default
<code>cache-dependency-glob</code></a></li>
<li><a
href="https://github.com/astral-sh/setup-uv/blob/HEAD/#use-default-cache-dir-on-self-hosted-runners">Use
default cache dir on self hosted runners</a></li>
</ul>
<h3>Activate environment</h3>
<p>In previous versions using the input <code>python-version</code>
automatically activated a venv at the repository root.
This led to some unwanted side-effects, was sometimes unexpected and not
flexible enough.</p>
<p>The venv activation is now explicitly controlled with the new input
<code>activate-environment</code> (false by default):</p>
<pre lang="yaml"><code>- name: Install the latest version of uv and
activate the environment
  uses: astral-sh/setup-uv@v6
  with:
    activate-environment: true
- run: uv pip install pip
</code></pre>
<p>The venv gets created by the <a
href="https://docs.astral.sh/uv/pip/environments/"><code>uv
venv</code></a> command so the python version is controlled by the
<code>python-version</code> input or the files
<code>pyproject.toml</code>, <code>uv.toml</code>,
<code>.python-version</code> in the <code>working-directory</code>.</p>
<h3>Working Directory</h3>
<p>The new input <code>working-directory</code> controls where we look
for <code>pyproject.toml</code>, <code>uv.toml</code> and
<code>.python-version</code> files
which are used to determine the version of uv and python to install.</p>
<p>It can also be used to control where the venv gets created.</p>
<pre lang="yaml"><code>- name: Install uv based on the config files in
the working-directory
  uses: astral-sh/setup-uv@v6
  with:
    working-directory: my/subproject/dir
</code></pre>
<blockquote>
<p>[!CAUTION]</p>
<p>The inputs <code>pyproject-file</code> and <code>uv-file</code> have
been removed.</p>
</blockquote>
<h3>Default <code>cache-dependency-glob</code></h3>
<p><a href="https://github.com/ssbarnea"><code>@​ssbarnea</code></a>
found out that the default <code>cache-dependency-glob</code> was not
suitable for a lot of users.</p>
<p>The old default</p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="6b9c6063ab"><code>6b9c606</code></a>
Bump dependencies (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/389">#389</a>)</li>
<li><a
href="ef6bcdff59"><code>ef6bcdf</code></a>
Fix default cache dependency glob (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/388">#388</a>)</li>
<li><a
href="9a311713f4"><code>9a31171</code></a>
chore: update known checksums for 0.6.17 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/384">#384</a>)</li>
<li><a
href="c7f87aa956"><code>c7f87aa</code></a>
bump to v6 in README (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/382">#382</a>)</li>
<li><a
href="aadfaf08d6"><code>aadfaf0</code></a>
Change default cache-dependency-glob (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/352">#352</a>)</li>
<li><a
href="a0f9da6273"><code>a0f9da6</code></a>
No default UV_CACHE_DIR on selfhosted runners (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/380">#380</a>)</li>
<li><a
href="ec4c691628"><code>ec4c691</code></a>
new inputs activate-environment and working-directory (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/381">#381</a>)</li>
<li><a
href="aa1290542e"><code>aa12905</code></a>
chore: update known checksums for 0.6.16 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/378">#378</a>)</li>
<li><a
href="fcaddda076"><code>fcaddda</code></a>
chore: update known checksums for 0.6.15 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/377">#377</a>)</li>
<li><a
href="fb3a0a97fa"><code>fb3a0a9</code></a>
log info on venv activation (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/375">#375</a>)</li>
<li>See full diff in <a
href="https://github.com/astral-sh/setup-uv/compare/v5...v6">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=astral-sh/setup-uv&package-manager=github_actions&previous-version=5&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-14 14:41:13 -04:00
Minh Nguyen
8af0dc5fd6 docs: Update langchain-anthropic version for tutorial with web search tool (#31240)
**Description:** This is a document change regarding integration with
package `langchain-anthropic` for newly released websearch tool ([Claude
doc](https://docs.anthropic.com/en/docs/build-with-claude/tool-use/web-search-tool)).

Issue 1: The sample in [Web Search
section](https://python.langchain.com/docs/integrations/chat/anthropic/#web-search)
did not run. You would get an error as below:
```
File "my_file.py", line 170, in call
    model_with_tools = model.bind_tools([websearch_tool])
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/langchain_anthropic/chat_models.py", line 1363, in bind_tools
    tool if _is_builtin_tool(tool) else convert_to_anthropic_tool(tool)
                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/langchain_anthropic/chat_models.py", line 1645, in convert_to_anthropic_tool
    input_schema=oai_formatted["parameters"],
                 ~~~~~~~~~~~~~^^^^^^^^^^^^^^
KeyError: 'parameters'
```
This is because websearch tool is only recently supported in
langchain-anthropic==0.3.13`, in [0.3.13
release](https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-anthropic%3D%3D0%22&expanded=true)
mentioning:
> anthropic[patch]: support web search
(https://github.com/langchain-ai/langchain/pull/31157)



Issue 2: The current doc has outdated package requirements for Websearch
tool: "This guide requires langchain-anthropic>=0.3.10".

Changes:
- Updated the required `langchain-anthropic` package version (0.3.10 ->
0.3.13).
- Added notes to user when using websearch sample.

I believe this will help avoid future confusion from readers.

**Issue:** N/A
**Dependencies:** N/A
**Twitter handle:** N/A
2025-05-14 14:19:32 -04:00
Sydney Runkle
7263011b24 perf[core]: remove unnecessary model validators (#31238)
* Remove unnecessary cast of id -> str (can do with a field setting)
* Remove unnecessary `set_text` model validator (can be done with a
computed field - though we had to make some changes to the `Generation`
class to make this possible

Before: ~2.4s

Blue circles represent time spent in custom validators :(

<img width="1337" alt="Screenshot 2025-05-14 at 10 10 12 AM"
src="https://github.com/user-attachments/assets/bb4f477f-4ee3-4870-ae93-14ca7f197d55"
/>


After: ~2.2s

<img width="1344" alt="Screenshot 2025-05-14 at 10 11 03 AM"
src="https://github.com/user-attachments/assets/99f97d80-49de-462f-856f-9e7e8662adbc"
/>

We still want to optimize the backwards compatible tool calls model
validator, though I think this might involve breaking changes, so wanted
to separate that into a different PR. This is circled in green.
2025-05-14 10:20:22 -07:00
Sydney Runkle
1523602196 packaging[core]: bump min pydantic version (#31239)
Bumping to a version that's a year old, so seems like a reasonable bump.
2025-05-14 10:01:24 -07:00
ccurme
367566b02f docs: fix notebook (#31233)
This is no longer runnable in CI.
2025-05-14 11:53:38 -04:00
Scott Brenner
29bfbc0ea6 infra: Dependabot configuration to update actions in workflow (#31026)
Noticed a few Actions used in the workflows here are outdated, proposing
a Dependabot configuration to update - reference
https://docs.github.com/en/actions/security-guides/using-githubs-security-features-to-secure-your-use-of-github-actions#keeping-the-actions-in-your-workflows-secure-and-up-to-date

Suggest enabling
https://docs.github.com/en/code-security/dependabot/working-with-dependabot/about-dependabot-on-github-actions-runners#enabling-or-disabling-for-your-repository
as well
2025-05-14 11:40:54 -04:00
Lope Ramos
b8ae2de169 langchain-core[patch]: Incremental record manager deletion should be batched (#31206)
**Description:** Before this commit, if one record is batched in more
than 32k rows for sqlite3 >= 3.32 or more than 999 rows for sqlite3 <
3.31, the `record_manager.delete_keys()` will fail, as we are creating a
query with too many variables.

This commit ensures that we are batching the delete operation leveraging
the `cleanup_batch_size` as it is already done for `full` cleanup.

Added unit tests for incremental mode as well on different deleting
batch size.
2025-05-14 11:38:21 -04:00
Sydney Runkle
263c215112 perf[core]: remove generations summation from hot loop (#31231)
1. Removes summation of `ChatGenerationChunk` from hot loops in `stream`
and `astream`
2. Removes run id gen from loop as well (minor impact)

Again, benchmarking on processing ~200k chunks (a poem about broccoli).

Before: ~4.2s

Blue circle is all the time spent adding up gen chunks

<img width="1345" alt="Screenshot 2025-05-14 at 7 48 33 AM"
src="https://github.com/user-attachments/assets/08a59d78-134d-4cd3-9d54-214de689df51"
/>

After: ~2.3s

Blue circle is remaining time spent on adding chunks, which can be
minimized in a future PR by optimizing the `merge_content`,
`merge_dicts`, and `merge_lists` utilities.

<img width="1353" alt="Screenshot 2025-05-14 at 7 50 08 AM"
src="https://github.com/user-attachments/assets/df6b3506-929e-4b6d-b198-7c4e992c6d34"
/>
2025-05-14 08:13:05 -07:00
Sydney Runkle
17b799860f perf[core]: remove costly async helpers for non-end event handlers (#31230)
1. Remove `shielded` decorator from non-end event handlers
2. Exit early with a `self.handlers` check instead of doing unnecessary
asyncio work

Using a benchmark that processes ~200k chunks (a poem about broccoli).

Before: ~15s

Circled in blue is unnecessary event handling time. This is addressed by
point 2 above

<img width="1347" alt="Screenshot 2025-05-14 at 7 37 53 AM"
src="https://github.com/user-attachments/assets/675e0fed-8f37-46c0-90b3-bef3cb9a1e86"
/>

After: ~4.2s

The total time is largely reduced by the removal of the `shielded`
decorator, which holds little significance for non-end handlers.

<img width="1348" alt="Screenshot 2025-05-14 at 7 37 22 AM"
src="https://github.com/user-attachments/assets/54be8a3e-5827-4136-a87b-54b0d40fe331"
/>
2025-05-14 07:42:56 -07:00
ccurme
0b8837a0cc openai: support runtime kwargs in embeddings (#31195) 2025-05-14 09:14:40 -04:00
Rares Vernica
4f41b54bcb docs:Fix Google GenAI Embedding params (#31188)
Extend Google parameters in the embeddings tab to include Google GenAI
(Gemini)

**Description:** Update embeddings tab to include example for Google
GenAI (Gemini)

**Issue:** N/A

**Dependencies:** N/A

**Twitter handle:** N/A


- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [ ] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-14 08:50:11 -04:00
MedlockM
ce0b1a9428 docs: update how_to docs to reflect new loaders interface and functionalities (#31219)
- **Description:** Updates two notebooks in the how_to documentation to
reflect new loader interfaces and functionalities.
- **Issue:** Some how_to notebooks were still using loader interfaces
from previous versions of LangChain and did not demonstrate the latest
loader functionalities (e.g., extracting images with `ImageBlobParser`,
extracting tables in specific output formats, parsing documents using
Vision-Language Models with `ZeroxPDFLoader`, and using
`CloudBlobLoader` in the `GenericLoader`, etc.).
- **Dependencies:** `py-zerox`
- **Twitter handle:** @MarcMedlock2

---------

Co-authored-by: Marc Medlock <marc.medlock@octo.com>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-13 17:59:01 -04:00
Michael Li
275e3b6710 docs: replace initialize_agent with create_react_agent in searchapi - replace deprecated load_tools (#31203)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 16:19:18 -04:00
Collier King
e53c10e546 docs: update Cloudflare examples for env references (#31205)
- [ ] **Docs Update**: "langchain-cloudflare: add env var references in
example notebooks"
- We've updated our Cloudflare integration example notebooks with
examples showing environmental variables to initialize the class
instances.
2025-05-13 16:18:27 -04:00
Michael Li
395f057243 docs: replace deprecated load_tools in google_finance.ipynb (#31220)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:56:11 -04:00
Michael Li
a9ee625f32 docs: replace deprecated load_tools in searx.mdx (#31218)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:53:44 -04:00
Michael Li
544648eb71 docs: replace deprecated load_tools in wolfram_alpha.mdx (#31217)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:53:29 -04:00
Michael Li
40be8d1d90 docs: replace deprecated load_tools in stackexchange.mdx (#31216)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:53:15 -04:00
Michael Li
f034bd7933 docs: replace deprecated load_tools in serpapi.mdx (#31215)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:53:01 -04:00
Michael Li
17a04dd598 docs: replace deprecated load_tools in google.mdx (#31214)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:52:46 -04:00
Michael Li
a44e707811 docs: replace deprecated load_tools in google_serper.mdx (#31213)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:52:30 -04:00
Michael Li
3520520a48 docs: replace deprecated load_tools in golden.mdx (#31212)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:52:15 -04:00
Michael Li
09d74504e3 docs: replace deprecated load_tools in dataforseo.mdx (#31211)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-13 14:51:40 -04:00
Shorthills AI
b2f0fbfea5 Update tools.mdx (#124) (#31207)
Thank you for contributing to LangChain!

- [ ] **PR title**: "package: description"
  
Changed toolkit=ExampleTookit to toolkit = ExampleToolkit(...) in
tools.mdx file

- [ ] **PR message**: ***Changed toolkit=ExampleTookit to toolkit =
ExampleToolkit(...) in tools.mdx file
- [ ] ***
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!

- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [ ] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

Co-authored-by: SiddharthAnandShorthillsAI <siddharth.anand@shorthills.ai>
2025-05-13 11:01:54 -04:00
Michael Li
636a35fc2d docs: replace initialize_agent with create_react_agent in llmonitor.md (#31200)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-12 22:05:11 +00:00
Michael Li
7b9feb60cc docs: replace initialize_agent with create_react_agent in openweathermap - replace deprecated load_tools (#31202)
…map.ipynb

Update openweathermap markdown file for tools

Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-12 15:12:02 -04:00
Michael Li
87add0809f docs: replace initialize_agent with create_react_agent in graphql.ipynb - replace deprecated load_tools (#31201)
Replace the deprecated load_tools

Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-12 15:11:39 -04:00
ccurme
868cfc4a8f openai: ignore function_calls if tool_calls are present (#31198)
Some providers include (legacy) function calls in `additional_kwargs` in
addition to tool calls. We currently unpack both function calls and tool
calls if present, but OpenAI will raise 400 in this case.

This can come up if providers are mixed in a tool-calling loop. Example:
```python
from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage
from langchain_core.tools import tool


@tool
def get_weather(location: str) -> str:
    """Get weather at a location."""
    return "It's sunny."



gemini = init_chat_model("google_genai:gemini-2.0-flash-001").bind_tools([get_weather])
openai = init_chat_model("openai:gpt-4.1-mini").bind_tools([get_weather])

input_message = HumanMessage("What's the weather in Boston?")
tool_call_message = gemini.invoke([input_message])

assert len(tool_call_message.tool_calls) == 1
tool_call = tool_call_message.tool_calls[0]
tool_message = get_weather.invoke(tool_call)

response = openai.invoke(  # currently raises 400 / BadRequestError
    [input_message, tool_call_message, tool_message]
)
```

Here we ignore function calls if tool calls are present.
2025-05-12 13:50:56 -04:00
Christophe Bornet
83d006190d core: Fix some private member accesses (#30912)
See https://github.com/langchain-ai/langchain/pull/30666

---------

Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
2025-05-12 17:42:26 +00:00
CtrlMj
1e56c66f86 core: Fix issue 31035 alias fields in base tool langchain core (#31112)
**Description**: The 'inspect' package in python skips over the aliases
set in the schema of a pydantic model. This is a workound to include the
aliases from the original input.
**issue**: #31035 


Cc: @ccurme @eyurtsev

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-12 11:04:13 -04:00
mateencog
92af7b0933 infra: Suppress error in make api_docs_clean if index.md is missing (#31129)
- **Description:** Added -f flag to rm when calling ```make
api_docs_clean``` to suppress error by make if index.md doesn't exist.
- **Dependencies:** none

On calling ```make api_docs_clean```

Behavior without this PR:
```
find ./docs/api_reference -name '*_api_reference.rst' -delete
git clean -fdX ./docs/api_reference
rm docs/api_reference/index.md
rm: cannot remove 'docs/api_reference/index.md': No such file or directory
make: *** [Makefile:51: api_docs_clean] Error 1
```
After this PR:

```
find ./docs/api_reference -name '*_api_reference.rst' -delete
git clean -fdX ./docs/api_reference
rm -f docs/api_reference/index.md
```
2025-05-11 17:26:49 -04:00
meirk-brd
e6147ce5d2 docs: Add Brightdata integration documentation (#31114)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"

- **Description:** Integrated the Bright Data package to enable
Langchain users to seamlessly incorporate Bright Data into their agents.
 - **Dependencies:** None
- **LinkedIn handle**:[Bright
Data](https://www.linkedin.com/company/bright-data)

- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-11 16:07:21 +00:00
Michael Li
0d59fe9789 docs: replace initialize_agent with create_react_agent in agent_vectorstore.ipynb (#31183)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-11 11:19:18 -04:00
ccurme
ff9183fd3c docs: add Gel integration (#31186)
Continued from https://github.com/langchain-ai/langchain/pull/31050

---------

Co-authored-by: deepbuzin <contactbuzin@gmail.com>
2025-05-11 10:17:18 -04:00
Michael Li
65fbbb0249 docs: replace initialize_agent with create_react_agent in searchapi.ipynb (#31184)
Thank you for contributing to LangChain!

- [ ] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [ ] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [ ] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-11 09:42:25 -04:00
ccurme
77d3f04e0a docs: add Aerospike to package registry (#31185)
Missed as part of https://github.com/langchain-ai/langchain/pull/31156
2025-05-11 09:33:58 -04:00
dwelch-spike
0dee089ba7 docs: document the move of the aerospike vector store integration to langchain-aerospike vec-595 (#31156)
**Description:** The Aerospike Vector Search vector store integration
has moved out of langchain-community and to its own repository,
https://github.com/aerospike/langchain-aerospike. This PR updates
langchain documentation to reference it.


If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-11 09:30:29 -04:00
Asif Mehmood
2ec74fea44 docs: update DoctranPropertyExtractor import path and fix typo (#31177)
**Description:**
Updated the import path for `DoctranPropertyExtractor` from
`langchain_community.document_loaders` to
`langchain_community.document_transformers` in multiple locations to
reflect recent package structure changes. Also corrected a minor typo in
the word "variable".

**Issue:**
N/A

**Dependencies:**
N/A

**LinkedIn handle:** For shout out if announced [Asif
Mehmood](https://www.linkedin.com/in/asifmehmood1997/).
2025-05-10 15:43:40 -04:00
Sumin Shin
683da2c9e9 text-splitters: Fix regex separator merge bug in CharacterTextSplitter (#31137)
**Description:**
Fix the merge logic in `CharacterTextSplitter.split_text` so that when
using a regex lookahead separator (`is_separator_regex=True`) with
`keep_separator=False`, the raw pattern is not re-inserted between
chunks.

**Issue:**
Fixes #31136 

**Dependencies:**
None

**Twitter handle:**
None

Since this is my first open-source PR, please feel free to point out any
mistakes, and I'll be eager to make corrections.
2025-05-10 15:42:03 -04:00
Michael Li
0ef4ac75b7 docs: remove duplicated and inaccurate mulvus doc (part of langchain-ai#31104) (#31154) 2025-05-10 19:38:11 +00:00
ccurme
23ec06b481 docs: showcase gemini-2.0-flash-preview-image-generation (#31176) 2025-05-09 11:17:15 -04:00
ccurme
e9e597be8e docs: update sort order in integrations table (#31171) 2025-05-08 20:44:21 +00:00
ccurme
0ba8697286 infra: add to vercel overrides (#31170)
Incompatibility between langchain-redis and langchain-ai21 `tenacity`
dep
2025-05-08 20:36:43 +00:00
ccurme
9aac8923a3 docs: add web search to anthropic docs (#31169) 2025-05-08 16:20:11 -04:00
Victor Hiairrassary
efc52e18e9 docs: fix typing in how_to/custom_tools.ipynb (#31164)
Fix typing in how_to/custom_tools.ipynb
2025-05-08 13:51:42 -04:00
ccurme
2d202f9762 anthropic[patch]: split test into two (#31167) 2025-05-08 09:23:36 -04:00
ccurme
d4555ac924 anthropic: release 0.3.13 (#31162) 2025-05-08 03:13:15 +00:00
ccurme
e34f9fd6f7 anthropic: update streaming usage metadata (#31158)
Anthropic updated how they report token counts during streaming today.
See changes to `MessageDeltaUsage` in [this
commit](2da00f26c5 (diff-1a396eba0cd9cd8952dcdb58049d3b13f6b7768ead1411888d66e28211f7bfc5)).

It's clean and simple to grab these fields from the final
`message_delta` event. However, some of them are typed as Optional, and
language
[here](e42451ab3f/src/anthropic/lib/streaming/_messages.py (L462))
suggests they may not always be present. So here we take the required
field from the `message_delta` event as we were doing previously, and
ignore the rest.
2025-05-07 23:09:56 -04:00
Tanushree
6c3901f9f9 Change banner (#31159)
Changing the banner to lead to careers page instead of interrupt

---------

Co-authored-by: Brace Sproul <braceasproul@gmail.com>
2025-05-07 18:07:10 -07:00
ccurme
682f338c17 anthropic[patch]: support web search (#31157) 2025-05-07 18:04:06 -04:00
ccurme
d7e016c5fc huggingface: release 0.2 (#31153) 2025-05-07 15:33:07 -04:00
ccurme
4b11cbeb47 huggingface[patch]: update lockfile (#31152) 2025-05-07 15:17:33 -04:00
ccurme
b5b90b5929 anthropic[patch]: be robust to null fields when translating usage metadata (#31151) 2025-05-07 18:30:21 +00:00
ccurme
f70b263ff3 core: release 0.3.59 (#31150) 2025-05-07 17:36:59 +00:00
ccurme
bb69d4c42e docs: specify js support for tavily (#31149) 2025-05-07 11:30:04 -04:00
zhurou603
1df3ee91e7 partners: (langchain-openai) total_tokens should not add 'Nonetype' t… (#31146)
partners: (langchain-openai) total_tokens should not add 'Nonetype' t…

# PR Description

## Description
Fixed an issue in `langchain-openai` where `total_tokens` was
incorrectly adding `None` to an integer, causing a TypeError. The fix
ensures proper type checking before adding token counts.

## Issue
Fixes the TypeError traceback shown in the image where `'NoneType'`
cannot be added to an integer.

## Dependencies
None

## Twitter handle
None

![image](https://github.com/user-attachments/assets/9683a795-a003-455a-ada9-fe277245e2b2)

Co-authored-by: qiulijie <qiulijie@yuaiweiwu.com>
2025-05-07 11:09:50 -04:00
Collier King
19041dcc95 docs: update langchain-cloudflare repo/path on packages.yaml (#31138)
**Library Repo Path Update **: "langchain-cloudflare"

We recently changed our `langchain-cloudflare` repo to allow for future
libraries.
Created a `libs` folder to hold `langchain-cloudflare` python package.


https://github.com/cloudflare/langchain-cloudflare/tree/main/libs/langchain-cloudflare
 
On `langchain`, updating `packages.yaml` to point to new
`libs/langchain-cloudflare` library folder.
2025-05-07 11:01:25 -04:00
Simonas Jakubonis
3cba22d8d7 docs: Pinecone Rerank example notebook (#31147)
Created an example notebook of how to use Pinecone Reranking service
cc @jamescalam
2025-05-07 11:00:42 -04:00
Jacob Lee
66d1ed6099 fix(core): Permit OpenAI style blocks to be passed into convert_to_openai_messages (#31140)
Should effectively be a noop, just shouldn't throw

CC @madams0013

---------

Co-authored-by: ccurme <chester.curme@gmail.com>
2025-05-07 10:57:37 -04:00
Tushar Nitave
a15034d8d1 docs: Fixed grammar for chat prompt composition (#31148)
This PR fixes a grammar issue in the sentence:

"A chat prompt is made up a of a list of messages..." → "A chat prompt
is made up of a list of messages. "
2025-05-07 10:51:34 -04:00
Michael Li
57c81dc3e3 docs: replace initialize_agent with create_react_agent in graphql.ipynb (#31133)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-06 16:09:52 -04:00
pulvedu
52732a4d13 Update docs (#31135)
Updating tavily docs to indicate JS support

---------

Co-authored-by: pulvedu <dustin@tavily.com>
2025-05-06 16:09:39 -04:00
Simonas Jakubonis
5dde64583e docs: Updated pinecone.mdx in the integration providers (#31123)
Updated pinecone.mdx in the integration providers

Added short description and examples for SparseVector store and
SparseEmbeddings
2025-05-06 12:58:32 -04:00
Tomaz Bratanic
6b6750967a Docs: Change to async llm graph transformer (#31126) 2025-05-06 12:53:57 -04:00
ccurme
703fce7972 docs: document that Anthropic supports boolean parallel_tool_calls param in guide (#31122) 2025-05-05 20:25:27 -04:00
唐小鸭
50fa524a6d partners: (langchain-deepseek) fix deepseek-r1 always returns an empty reasoning_content when reasoning (#31065)
## Description
deepseek-r1 always returns an empty string `reasoning_content` to the
first chunk when thinking, and sets `reasoning_content` to None when
thinking is over, to determine when to switch to normal output.

Therefore, whether the reasoning_content field exists should be judged
as None.

## Demo
deepseek-r1 reasoning output: 

```
{'delta': {'content': None, 'function_call': None, 'refusal': None, 'role': 'assistant', 'tool_calls': None, 'reasoning_content': ''}, 'finish_reason': None, 'index': 0, 'logprobs': None}
{'delta': {'content': None, 'function_call': None, 'refusal': None, 'role': None, 'tool_calls': None, 'reasoning_content': '好的'}, 'finish_reason': None, 'index': 0, 'logprobs': None}
{'delta': {'content': None, 'function_call': None, 'refusal': None, 'role': None, 'tool_calls': None, 'reasoning_content': ','}, 'finish_reason': None, 'index': 0, 'logprobs': None}
{'delta': {'content': None, 'function_call': None, 'refusal': None, 'role': None, 'tool_calls': None, 'reasoning_content': '用户'}, 'finish_reason': None, 'index': 0, 'logprobs': None}
...
```

deepseek-r1 first normal output
```
...
{'delta': {'content': ' main', 'function_call': None, 'refusal': None, 'role': None, 'tool_calls': None, 'reasoning_content': None}, 'finish_reason': None, 'index': 0, 'logprobs': None}
{'delta': {'content': '\n\nimport', 'function_call': None, 'refusal': None, 'role': None, 'tool_calls': None, 'reasoning_content': None}, 'finish_reason': None, 'index': 0, 'logprobs': None}
...
```

---------

Co-authored-by: ccurme <chester.curme@gmail.com>
2025-05-05 22:31:58 +00:00
Michael Li
c0b69808a8 docs: replace initialize_agent with create_react_agent in openweathermap.ipynb (#31115)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-05 18:08:09 -04:00
ccurme
fce8caca16 docs: minor fix in Pinecone (#31110) 2025-05-03 20:16:27 +00:00
Simonas Jakubonis
b8d0403671 docs: updated pinecone example notebook (#30993)
- **Description:** Update Pinecone notebook example
  - **Issue:** N\A
  - **Dependencies:** N\A
  - **Twitter handle:** N\A


- [ x ] **Add tests and docs**: Just notebook updates


If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
2025-05-03 16:02:21 -04:00
Michael Li
1204fb8010 docs: replace initialize_agent with create_react_agent in yahoo_finance_news.ipynb (#31108)
Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, core, etc. is being
modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI
changes.
  - Example: "core: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-03 15:45:52 -04:00
Adeel Ehsan
1e00116ae7 docs: add docs for vectara tools (#30958)
Thank you for contributing to LangChain!

- [ ] **Docs for Vectara Tools**: "langchain-vectara"
2025-05-03 15:39:16 -04:00
Haris Colic
3e25a93136 Docs: replace initialize agent with create react agent for google tools (#31043)
- **Description:** The deprecated initialize_agent functionality is
replaced with create_react_agent for the google tools. Also noticed a
potential issue with the non-existent "google-drive-search" which was
used in the old `google-drive.ipynb`. If this should be a by default
available tool, an issue should be opened to modify
langchain-community's `load_tools` accordingly.
- **Issue:**  #29277
- **Dependencies:** No added dependencies
- **Twitter handle:** No Twitter account
2025-05-03 15:20:07 -04:00
Stefano Lottini
325f729a92 docs: improvements to Astra DB pages, especially modernize Vector DB example notebook (#30961)
This PR brings several improvements and modernizations to the
documentation around the Astra DB partner package.

- language alignment for better matching with the terms used in the
Astra DB docs
- updated several links to pages on said documentation
- for the `AstraDBVectorStore`, added mentions of the new features in
the overall `astra.mdx`
- for the vector store, rewritten/upgraded most of the usage example
notebook for a more straightforward experience able to highlight the
main usage patterns (including new ones such as the newly-introduced
"autodetect feature")

---------

Co-authored-by: ccurme <chester.curme@gmail.com>
2025-05-03 14:26:52 -04:00
Asif Mehmood
00ac49dd3e Replace deprecated .dict() with .model_dump() for Pydantic v2 compatibility (#31107)
**What does this PR do?**
This PR replaces deprecated usages of ```.dict()``` with
```.model_dump()``` to ensure compatibility with Pydantic v2 and prepare
for v3, addressing the deprecation warning
```PydanticDeprecatedSince20``` as required in [Issue#
31103](https://github.com/langchain-ai/langchain/issues/31103).

**Changes made:**
* Replaced ```.dict()``` with ```.model_dump()``` in multiple locations
* Ensured consistency with Pydantic v2 migration guidelines
* Verified compatibility across affected modules

**Notes**
* This is a code maintenance and compatibility update
* Tested locally with Pydantic v2.11
* No functional logic changes; only internal method replacements to
prevent deprecation issues
2025-05-03 13:40:54 -04:00
ccurme
6268ae8db0 langchain: release 0.3.25 (#31101) 2025-05-02 17:42:32 +00:00
ccurme
77ecf47f6d openai: release 0.3.16 (#31100) 2025-05-02 13:14:46 -04:00
291 changed files with 12760 additions and 12560 deletions

11
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,11 @@
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
# and
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"

View File

@@ -37,7 +37,6 @@ IGNORED_PARTNERS = [
]
PY_312_MAX_PACKAGES = [
"libs/partners/voyageai",
"libs/partners/chroma", # https://github.com/chroma-core/chroma/issues/4382
]

View File

@@ -67,7 +67,6 @@ jobs:
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}

View File

@@ -322,7 +322,6 @@ jobs:
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
@@ -341,7 +340,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
partner: [openai, anthropic]
partner: [anthropic]
fail-fast: false # Continue testing other partners if one fails
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

View File

@@ -12,7 +12,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Use Node.js 18.x
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: 18.x
cache: "yarn"

View File

@@ -29,7 +29,7 @@ jobs:
with:
python-version: '3.11'
- id: files
uses: Ana06/get-changed-files@v2.2.0
uses: Ana06/get-changed-files@v2.3.0
- id: set-matrix
run: |
python -m pip install packaging requests

View File

@@ -24,7 +24,7 @@ jobs:
with:
python-version: '3.10'
- id: files
uses: Ana06/get-changed-files@v2.2.0
uses: Ana06/get-changed-files@v2.3.0
with:
filter: |
*.ipynb

View File

@@ -21,12 +21,12 @@ jobs:
# We have to use 3.12, 3.13 is not yet supported
- name: Install uv
uses: astral-sh/setup-uv@v5
uses: astral-sh/setup-uv@v6
with:
python-version: "3.12"
# Using this action is still necessary for CodSpeed to work
- uses: actions/setup-python@v3
- uses: actions/setup-python@v5
with:
python-version: "3.12"

View File

@@ -103,12 +103,6 @@ repos:
entry: make -C libs/partners/qdrant format
files: ^libs/partners/qdrant/
pass_filenames: false
- id: voyageai
name: format partners/voyageai
language: system
entry: make -C libs/partners/voyageai format
files: ^libs/partners/voyageai/
pass_filenames: false
- id: root
name: format docs, cookbook
language: system

View File

@@ -48,7 +48,7 @@ api_docs_quick_preview:
api_docs_clean:
find ./docs/api_reference -name '*_api_reference.rst' -delete
git clean -fdX ./docs/api_reference
rm docs/api_reference/index.md
rm -f docs/api_reference/index.md
## api_docs_linkcheck: Run linkchecker on the API Reference documentation.

View File

@@ -22,7 +22,19 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 1,
"id": "e8d63d14-138d-4aa5-a741-7fd3537d00aa",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2e87c10a",
"metadata": {},
"outputs": [],
@@ -37,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 3,
"id": "0b7b772b",
"metadata": {},
"outputs": [],
@@ -54,19 +66,10 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 4,
"id": "f2675861",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running Chroma using direct local API.\n",
"Using DuckDB in-memory for database. Data will be transient.\n"
]
}
],
"outputs": [],
"source": [
"from langchain_community.document_loaders import TextLoader\n",
"\n",
@@ -81,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "bc5403d4",
"metadata": {},
"outputs": [],
@@ -93,17 +96,25 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "1431cded",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
]
}
],
"source": [
"from langchain_community.document_loaders import WebBaseLoader"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "915d3ff3",
"metadata": {},
"outputs": [],
@@ -113,16 +124,20 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "96a2edf8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"Running Chroma using direct local API.\n",
"Using DuckDB in-memory for database. Data will be transient.\n"
"Created a chunk of size 2122, which is longer than the specified 1000\n",
"Created a chunk of size 3187, which is longer than the specified 1000\n",
"Created a chunk of size 1017, which is longer than the specified 1000\n",
"Created a chunk of size 1049, which is longer than the specified 1000\n",
"Created a chunk of size 1256, which is longer than the specified 1000\n",
"Created a chunk of size 2321, which is longer than the specified 1000\n"
]
}
],
@@ -135,14 +150,6 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71ecef90",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "c0a6c031",
@@ -153,31 +160,30 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 9,
"id": "eb142786",
"metadata": {},
"outputs": [],
"source": [
"# Import things that are needed generically\n",
"from langchain.agents import AgentType, Tool, initialize_agent\n",
"from langchain_openai import OpenAI"
"from langchain.agents import Tool"
]
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 10,
"id": "850bc4e9",
"metadata": {},
"outputs": [],
"source": [
"tools = [\n",
" Tool(\n",
" name=\"State of Union QA System\",\n",
" name=\"state_of_union_qa_system\",\n",
" func=state_of_union.run,\n",
" description=\"useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question.\",\n",
" ),\n",
" Tool(\n",
" name=\"Ruff QA System\",\n",
" name=\"ruff_qa_system\",\n",
" func=ruff.run,\n",
" description=\"useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question.\",\n",
" ),\n",
@@ -186,94 +192,116 @@
},
{
"cell_type": "code",
"execution_count": 45,
"id": "fc47f230",
"execution_count": 11,
"id": "70c461d8-aaca-4f2a-9a93-bf35841cc615",
"metadata": {},
"outputs": [],
"source": [
"# Construct the agent. We will use the default agent type here.\n",
"# See documentation for a full list of options.\n",
"agent = initialize_agent(\n",
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
")"
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "10ca2db8",
"execution_count": 12,
"id": "a6d2b911-3044-4430-a35b-75832bb45334",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What did biden say about ketanji brown jackson in the state of the union address?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" state_of_union_qa_system (call_26QlRdsptjEJJZjFsAUjEbaH)\n",
" Call ID: call_26QlRdsptjEJJZjFsAUjEbaH\n",
" Args:\n",
" __arg1: What did Biden say about Ketanji Brown Jackson in the state of the union address?\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: state_of_union_qa_system\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to find out what Biden said about Ketanji Brown Jackson in the State of the Union address.\n",
"Action: State of Union QA System\n",
"Action Input: What did Biden say about Ketanji Brown Jackson in the State of the Union address?\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n",
" Biden said that he nominated Ketanji Brown Jackson for the United States Supreme Court and praised her as one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence.\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
"In the State of the Union address, Biden said that he nominated Ketanji Brown Jackson for the United States Supreme Court and praised her as one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence.\n"
]
},
{
"data": {
"text/plain": [
"\"Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\n",
" \"What did biden say about ketanji brown jackson in the state of the union address?\"\n",
")"
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"What did biden say about ketanji brown jackson in the state of the union address?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "4e91b811",
"execution_count": 13,
"id": "e836b4cd-abf7-49eb-be0e-b9ad501213f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"Why use ruff over flake8?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" ruff_qa_system (call_KqDoWeO9bo9OAXdxOsCb6msC)\n",
" Call ID: call_KqDoWeO9bo9OAXdxOsCb6msC\n",
" Args:\n",
" __arg1: Why use ruff over flake8?\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: ruff_qa_system\n",
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to find out the advantages of using ruff over flake8\n",
"Action: Ruff QA System\n",
"Action Input: What are the advantages of using ruff over flake8?\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3m Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.\u001b[0m\n",
"There are a few reasons why someone might choose to use Ruff over Flake8:\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
"1. Larger rule set: Ruff implements over 800 rules, while Flake8 only implements around 200. This means that Ruff can catch more potential issues in your code.\n",
"\n",
"2. Better compatibility with other tools: Ruff is designed to work well with other tools like Black, isort, and type checkers like Mypy. This means that you can use Ruff alongside these tools to get more comprehensive feedback on your code.\n",
"\n",
"3. Automatic fixing of lint violations: Unlike Flake8, Ruff is capable of automatically fixing its own lint violations. This can save you time and effort when fixing issues in your code.\n",
"\n",
"4. Native implementation of popular Flake8 plugins: Ruff re-implements some of the most popular Flake8 plugins natively, which means you don't have to install and configure multiple plugins to get the same functionality.\n",
"\n",
"Overall, Ruff offers a more comprehensive and user-friendly experience compared to Flake8, making it a popular choice for many developers.\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"You might choose to use Ruff over Flake8 for several reasons:\n",
"\n",
"1. Ruff has a much larger rule set, implementing over 800 rules compared to Flake8's roughly 200, so it can catch more potential issues.\n",
"2. Ruff is designed to work better with other tools like Black, isort, and type checkers like Mypy, providing more comprehensive code feedback.\n",
"3. Ruff can automatically fix its own lint violations, which Flake8 cannot, saving time and effort.\n",
"4. Ruff natively implements some popular Flake8 plugins, so you don't need to install and configure multiple plugins separately.\n",
"\n",
"Overall, Ruff offers a more comprehensive and user-friendly experience compared to Flake8.\n"
]
},
{
"data": {
"text/plain": [
"'Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.'"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\"Why use ruff over flake8?\")"
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"Why use ruff over flake8?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
@@ -296,20 +324,20 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 14,
"id": "f59b377e",
"metadata": {},
"outputs": [],
"source": [
"tools = [\n",
" Tool(\n",
" name=\"State of Union QA System\",\n",
" name=\"state_of_union_qa_system\",\n",
" func=state_of_union.run,\n",
" description=\"useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question.\",\n",
" return_direct=True,\n",
" ),\n",
" Tool(\n",
" name=\"Ruff QA System\",\n",
" name=\"ruff_qa_system\",\n",
" func=ruff.run,\n",
" description=\"useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question.\",\n",
" return_direct=True,\n",
@@ -319,90 +347,92 @@
},
{
"cell_type": "code",
"execution_count": 49,
"id": "8615707a",
"execution_count": 15,
"id": "06f69c0f-c83d-4b7f-a1c8-7614aced3bae",
"metadata": {},
"outputs": [],
"source": [
"agent = initialize_agent(\n",
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
")"
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "36e718a9",
"execution_count": 16,
"id": "a6b38c12-ac25-43c0-b9c2-2b1985ab4825",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What did biden say about ketanji brown jackson in the state of the union address?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" state_of_union_qa_system (call_yjxh11OnZiauoyTAn9npWdxj)\n",
" Call ID: call_yjxh11OnZiauoyTAn9npWdxj\n",
" Args:\n",
" __arg1: What did Biden say about Ketanji Brown Jackson in the state of the union address?\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: state_of_union_qa_system\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to find out what Biden said about Ketanji Brown Jackson in the State of the Union address.\n",
"Action: State of Union QA System\n",
"Action Input: What did Biden say about Ketanji Brown Jackson in the State of the Union address?\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
" Biden said that he nominated Ketanji Brown Jackson for the United States Supreme Court and praised her as one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence.\n"
]
},
{
"data": {
"text/plain": [
"\" Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\n",
" \"What did biden say about ketanji brown jackson in the state of the union address?\"\n",
")"
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"What did biden say about ketanji brown jackson in the state of the union address?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "edfd0a1a",
"execution_count": 17,
"id": "88f08d86-7972-4148-8128-3ac8898ad68a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"Why use ruff over flake8?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" ruff_qa_system (call_GiWWfwF6wbbRFQrHlHbhRtGW)\n",
" Call ID: call_GiWWfwF6wbbRFQrHlHbhRtGW\n",
" Args:\n",
" __arg1: What are the advantages of using ruff over flake8 for Python linting?\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: ruff_qa_system\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to find out the advantages of using ruff over flake8\n",
"Action: Ruff QA System\n",
"Action Input: What are the advantages of using ruff over flake8?\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3m Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
" Ruff has a larger rule set, supports automatic fixing of lint violations, and does not require the installation of additional plugins. It also has better compatibility with Black and can be used alongside a type checker for more comprehensive code analysis.\n"
]
},
{
"data": {
"text/plain": [
"' Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.'"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\"Why use ruff over flake8?\")"
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"Why use ruff over flake8?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
@@ -417,19 +447,19 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 18,
"id": "d397a233",
"metadata": {},
"outputs": [],
"source": [
"tools = [\n",
" Tool(\n",
" name=\"State of Union QA System\",\n",
" name=\"state_of_union_qa_system\",\n",
" func=state_of_union.run,\n",
" description=\"useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question, not referencing any obscure pronouns from the conversation before.\",\n",
" ),\n",
" Tool(\n",
" name=\"Ruff QA System\",\n",
" name=\"ruff_qa_system\",\n",
" func=ruff.run,\n",
" description=\"useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question, not referencing any obscure pronouns from the conversation before.\",\n",
" ),\n",
@@ -438,60 +468,60 @@
},
{
"cell_type": "code",
"execution_count": 58,
"id": "06157240",
"execution_count": 19,
"id": "41743f29-150d-40ba-aa8e-3a63c32216aa",
"metadata": {},
"outputs": [],
"source": [
"# Construct the agent. We will use the default agent type here.\n",
"# See documentation for a full list of options.\n",
"agent = initialize_agent(\n",
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
")"
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "b492b520",
"execution_count": 20,
"id": "e20e81dd-284a-4d07-9160-63a84b65cba8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What tool does ruff use to run over Jupyter Notebooks? Did the president mention that tool in the state of the union?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" ruff_qa_system (call_VOnxiOEehauQyVOTjDJkR5L2)\n",
" Call ID: call_VOnxiOEehauQyVOTjDJkR5L2\n",
" Args:\n",
" __arg1: What tool does ruff use to run over Jupyter Notebooks?\n",
" state_of_union_qa_system (call_AbSsXAxwe4JtCRhga926SxOZ)\n",
" Call ID: call_AbSsXAxwe4JtCRhga926SxOZ\n",
" Args:\n",
" __arg1: Did the president mention the tool that ruff uses to run over Jupyter Notebooks in the state of the union?\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: state_of_union_qa_system\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to find out what tool ruff uses to run over Jupyter Notebooks, and if the president mentioned it in the state of the union.\n",
"Action: Ruff QA System\n",
"Action Input: What tool does ruff use to run over Jupyter Notebooks?\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3m Ruff is integrated into nbQA, a tool for running linters and code formatters over Jupyter Notebooks. After installing ruff and nbqa, you can run Ruff over a notebook like so: > nbqa ruff Untitled.html\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now need to find out if the president mentioned this tool in the state of the union.\n",
"Action: State of Union QA System\n",
"Action Input: Did the president mention nbQA in the state of the union?\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m No, the president did not mention nbQA in the state of the union.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
"Final Answer: No, the president did not mention nbQA in the state of the union.\u001b[0m\n",
" No, the president did not mention the tool that ruff uses to run over Jupyter Notebooks in the state of the union.\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
"Ruff does not support source.organizeImports and source.fixAll code actions in Jupyter Notebooks. Additionally, the president did not mention the tool that ruff uses to run over Jupyter Notebooks in the state of the union.\n"
]
},
{
"data": {
"text/plain": [
"'No, the president did not mention nbQA in the state of the union.'"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\n",
" \"What tool does ruff use to run over Jupyter Notebooks? Did the president mention that tool in the state of the union?\"\n",
")"
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"What tool does ruff use to run over Jupyter Notebooks? Did the president mention that tool in the state of the union?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
@@ -519,7 +549,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
"version": "3.12.4"
}
},
"nbformat": 4,

View File

@@ -663,6 +663,7 @@ def main(dirs: Optional[list] = None) -> None:
dir_
for dir_ in os.listdir(ROOT_DIR / "libs")
if dir_ not in ("cli", "partners", "packages.yml")
and "pyproject.toml" in os.listdir(ROOT_DIR / "libs" / dir_)
]
dirs += [
dir_

View File

@@ -48,7 +48,7 @@ From the opposite direction, scientists use `LangChain` in research and referenc
| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022&#8209;05&#8209;25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022&#8209;03&#8209;15 | `Docs:` [docs/tutorials/sql_qa](https://python.langchain.com/docs/tutorials/sql_qa), `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher, et al. | 2022&#8209;02&#8209;01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
| `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon, et al. | 2021&#8209;12&#8209;02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/docs/integrations/providers/dspy)
| `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon, et al. | 2021&#8209;12&#8209;02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts)
| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy, et al. | 2021&#8209;02&#8209;26 | `API:` [langchain_experimental.open_clip](https://python.langchain.com/api_reference/experimental/open_clip.html)
| `2005.14165v4` [Language Models are Few-Shot Learners](http://arxiv.org/abs/2005.14165v4) | Tom B. Brown, Benjamin Mann, Nick Ryder, et al. | 2020&#8209;05&#8209;28 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
| `2005.11401v4` [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](http://arxiv.org/abs/2005.11401v4) | Patrick Lewis, Ethan Perez, Aleksandra Piktus, et al. | 2020&#8209;05&#8209;22 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
@@ -970,7 +970,7 @@ reducing degenerate repetitions.
- **arXiv id:** [2112.01488v3](http://arxiv.org/abs/2112.01488v3) **Published Date:** 2021-12-02
- **LangChain:**
- **Documentation:** [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/docs/integrations/providers/dspy)
- **Documentation:** [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts)
**Abstract:** Neural information retrieval (IR) has greatly advanced search and other
knowledge-intensive language tasks. While many neural IR methods encode queries

View File

@@ -32,7 +32,7 @@ The only requirement for a retriever is the ability to accepts a query and retur
In particular, [LangChain's retriever class](https://python.langchain.com/api_reference/core/retrievers/langchain_core.retrievers.BaseRetriever.html#) only requires that the `_get_relevant_documents` method is implemented, which takes a `query: str` and returns a list of [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects that are most relevant to the query.
The underlying logic used to get relevant documents is specified by the retriever and can be whatever is most useful for the application.
A LangChain retriever is a [runnable](/docs/how_to/lcel_cheatsheet/), which is a standard interface is for LangChain components.
A LangChain retriever is a [runnable](/docs/how_to/lcel_cheatsheet/), which is a standard interface for LangChain components.
This means that it has a few common methods, including `invoke`, that are used to interact with it. A retriever can be invoked with a query:
```python

View File

@@ -192,7 +192,7 @@ All Toolkits expose a `get_tools` method which returns a list of tools. You can
```python
# Initialize a toolkit
toolkit = ExampleTookit(...)
toolkit = ExampleToolkit(...)
# Get list of tools
tools = toolkit.get_tools()

View File

@@ -530,7 +530,7 @@
"\n",
" def _run(\n",
" self, a: int, b: int, run_manager: Optional[CallbackManagerForToolRun] = None\n",
" ) -> str:\n",
" ) -> int:\n",
" \"\"\"Use the tool.\"\"\"\n",
" return a * b\n",
"\n",
@@ -539,7 +539,7 @@
" a: int,\n",
" b: int,\n",
" run_manager: Optional[AsyncCallbackManagerForToolRun] = None,\n",
" ) -> str:\n",
" ) -> int:\n",
" \"\"\"Use the tool asynchronously.\"\"\"\n",
" # If the calculation is cheap, you can just delegate to the sync implementation\n",
" # as shown below.\n",

View File

@@ -67,9 +67,34 @@
"When implementing a document loader do **NOT** provide parameters via the `lazy_load` or `alazy_load` methods.\n",
"\n",
"All configuration is expected to be passed through the initializer (__init__). This was a design choice made by LangChain to make sure that once a document loader has been instantiated it has all the information needed to load documents.\n",
":::\n",
"\n",
":::"
]
},
{
"cell_type": "markdown",
"id": "520edbbabde7df6e",
"metadata": {},
"source": [
"### Installation\n",
"\n",
"Install **langchain-core** and **langchain_community**."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "936bd5fc",
"metadata": {},
"outputs": [],
"source": [
"%pip install -qU langchain_core langchain_community"
]
},
{
"cell_type": "markdown",
"id": "a93f17a87d323bdd",
"metadata": {},
"source": [
"### Implementation\n",
"\n",
"Let's create an example of a standard document loader that loads a file and creates a document from each line in the file."
@@ -77,9 +102,13 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "20f128c1-1a2c-43b9-9e7b-cf9b3a86d1db",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:56.764714Z",
"start_time": "2025-04-21T08:49:56.623508Z"
},
"tags": []
},
"outputs": [],
@@ -122,7 +151,7 @@
" self,\n",
" ) -> AsyncIterator[Document]: # <-- Does not take any arguments\n",
" \"\"\"An async lazy loader that reads a file line by line.\"\"\"\n",
" # Requires aiofiles (install with pip)\n",
" # Requires aiofiles\n",
" # https://github.com/Tinche/aiofiles\n",
" import aiofiles\n",
"\n",
@@ -151,9 +180,13 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "b1751198-c6dd-4149-95bd-6370ce8fa06f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:56.776521Z",
"start_time": "2025-04-21T08:49:56.773511Z"
},
"tags": []
},
"outputs": [],
@@ -167,9 +200,23 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "c5210428",
"metadata": {},
"outputs": [],
"source": [
"%pip install -q aiofiles"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "71ef1482-f9de-4852-b5a4-0938f350612e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:57.972675Z",
"start_time": "2025-04-21T08:49:57.969411Z"
},
"tags": []
},
"outputs": [
@@ -179,10 +226,12 @@
"text": [
"\n",
"<class 'langchain_core.documents.base.Document'>\n",
"page_content='meow meow🐱 \\n' metadata={'line_number': 0, 'source': './meow.txt'}\n",
"page_content='meow meow🐱 \n",
"' metadata={'line_number': 0, 'source': './meow.txt'}\n",
"\n",
"<class 'langchain_core.documents.base.Document'>\n",
"page_content=' meow meow🐱 \\n' metadata={'line_number': 1, 'source': './meow.txt'}\n",
"page_content=' meow meow🐱 \n",
"' metadata={'line_number': 1, 'source': './meow.txt'}\n",
"\n",
"<class 'langchain_core.documents.base.Document'>\n",
"page_content=' meow😻😻' metadata={'line_number': 2, 'source': './meow.txt'}\n"
@@ -199,9 +248,13 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"id": "1588e78c-e81a-4d40-b36c-634242c84a6a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.028989Z",
"start_time": "2025-04-21T08:49:58.021972Z"
},
"tags": []
},
"outputs": [
@@ -211,10 +264,12 @@
"text": [
"\n",
"<class 'langchain_core.documents.base.Document'>\n",
"page_content='meow meow🐱 \\n' metadata={'line_number': 0, 'source': './meow.txt'}\n",
"page_content='meow meow🐱 \n",
"' metadata={'line_number': 0, 'source': './meow.txt'}\n",
"\n",
"<class 'langchain_core.documents.base.Document'>\n",
"page_content=' meow meow🐱 \\n' metadata={'line_number': 1, 'source': './meow.txt'}\n",
"page_content=' meow meow🐱 \n",
"' metadata={'line_number': 1, 'source': './meow.txt'}\n",
"\n",
"<class 'langchain_core.documents.base.Document'>\n",
"page_content=' meow😻😻' metadata={'line_number': 2, 'source': './meow.txt'}\n"
@@ -245,21 +300,25 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "df5ad46a-9e00-4073-8505-489fc4f3799e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.078111Z",
"start_time": "2025-04-21T08:49:58.071421Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='meow meow🐱 \\n', metadata={'line_number': 0, 'source': './meow.txt'}),\n",
" Document(page_content=' meow meow🐱 \\n', metadata={'line_number': 1, 'source': './meow.txt'}),\n",
" Document(page_content=' meow😻😻', metadata={'line_number': 2, 'source': './meow.txt'})]"
"[Document(metadata={'line_number': 0, 'source': './meow.txt'}, page_content='meow meow🐱 \\n'),\n",
" Document(metadata={'line_number': 1, 'source': './meow.txt'}, page_content=' meow meow🐱 \\n'),\n",
" Document(metadata={'line_number': 2, 'source': './meow.txt'}, page_content=' meow😻😻')]"
]
},
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -286,9 +345,13 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "209f6a91-2f15-4cb2-9237-f79fc9493b82",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.124363Z",
"start_time": "2025-04-21T08:49:58.120782Z"
},
"tags": []
},
"outputs": [],
@@ -313,9 +376,13 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "b1275c59-06d4-458f-abd2-fcbad0bde442",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.172506Z",
"start_time": "2025-04-21T08:49:58.167416Z"
},
"tags": []
},
"outputs": [],
@@ -326,21 +393,25 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 10,
"id": "56a3d707-2086-413b-ae82-50e92ddb27f6",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.218426Z",
"start_time": "2025-04-21T08:49:58.214684Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='meow meow🐱 \\n', metadata={'line_number': 1, 'source': './meow.txt'}),\n",
" Document(page_content=' meow meow🐱 \\n', metadata={'line_number': 2, 'source': './meow.txt'}),\n",
" Document(page_content=' meow😻😻', metadata={'line_number': 3, 'source': './meow.txt'})]"
"[Document(metadata={'line_number': 1, 'source': './meow.txt'}, page_content='meow meow🐱 \\n'),\n",
" Document(metadata={'line_number': 2, 'source': './meow.txt'}, page_content=' meow meow🐱 \\n'),\n",
" Document(metadata={'line_number': 3, 'source': './meow.txt'}, page_content=' meow😻😻')]"
]
},
"execution_count": 8,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -359,20 +430,24 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"id": "20d03092-ba35-47d7-b612-9d1631c261cd",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.267755Z",
"start_time": "2025-04-21T08:49:58.264369Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='some data from memory\\n', metadata={'line_number': 1, 'source': None}),\n",
" Document(page_content='meow', metadata={'line_number': 2, 'source': None})]"
"[Document(metadata={'line_number': 1, 'source': None}, page_content='some data from memory\\n'),\n",
" Document(metadata={'line_number': 2, 'source': None}, page_content='meow')]"
]
},
"execution_count": 9,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -394,9 +469,13 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 12,
"id": "a9e92e0e-c8da-401c-b8c6-f0676004cf58",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.330432Z",
"start_time": "2025-04-21T08:49:58.327223Z"
},
"tags": []
},
"outputs": [],
@@ -406,9 +485,13 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 13,
"id": "6b559d30-8b0c-4e45-86b1-e4602d9aaa7e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.383905Z",
"start_time": "2025-04-21T08:49:58.380658Z"
},
"tags": []
},
"outputs": [
@@ -418,7 +501,7 @@
"'utf-8'"
]
},
"execution_count": 11,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -429,9 +512,13 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 14,
"id": "2f7b145a-9c6f-47f9-9487-1f4b25aff46f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.443829Z",
"start_time": "2025-04-21T08:49:58.440222Z"
},
"tags": []
},
"outputs": [
@@ -441,7 +528,7 @@
"b'meow meow\\xf0\\x9f\\x90\\xb1 \\n meow meow\\xf0\\x9f\\x90\\xb1 \\n meow\\xf0\\x9f\\x98\\xbb\\xf0\\x9f\\x98\\xbb'"
]
},
"execution_count": 12,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -452,9 +539,13 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 15,
"id": "9b9482fa-c49c-42cd-a2ef-80bc93214631",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.498609Z",
"start_time": "2025-04-21T08:49:58.494903Z"
},
"tags": []
},
"outputs": [
@@ -464,7 +555,7 @@
"'meow meow🐱 \\n meow meow🐱 \\n meow😻😻'"
]
},
"execution_count": 13,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -475,19 +566,23 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 16,
"id": "04cc7a81-290e-4ef8-b7e1-d885fcc59ece",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.551353Z",
"start_time": "2025-04-21T08:49:58.547518Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"<contextlib._GeneratorContextManager at 0x743f34324450>"
"<contextlib._GeneratorContextManager at 0x74b8d42e9940>"
]
},
"execution_count": 14,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -498,9 +593,13 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 17,
"id": "ec8de0ab-51d7-4e41-82c9-3ce0a6fdc2cd",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.599576Z",
"start_time": "2025-04-21T08:49:58.596567Z"
},
"tags": []
},
"outputs": [
@@ -510,7 +609,7 @@
"{'foo': 'bar'}"
]
},
"execution_count": 15,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -521,9 +620,13 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 18,
"id": "19eae991-ae48-43c2-8952-7347cdb76a34",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.649634Z",
"start_time": "2025-04-21T08:49:58.646313Z"
},
"tags": []
},
"outputs": [
@@ -533,7 +636,7 @@
"'./meow.txt'"
]
},
"execution_count": 16,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -551,65 +654,50 @@
"\n",
"While a parser encapsulates the logic needed to parse binary data into documents, *blob loaders* encapsulate the logic that's necessary to load blobs from a given storage location.\n",
"\n",
"At the moment, `LangChain` only supports `FileSystemBlobLoader`.\n",
"At the moment, `LangChain` supports `FileSystemBlobLoader` and `CloudBlobLoader`.\n",
"\n",
"You can use the `FileSystemBlobLoader` to load blobs and then use the parser to parse them."
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 19,
"id": "c093becb-2e84-4329-89e3-956a3bd765e5",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:49:58.718259Z",
"start_time": "2025-04-21T08:49:58.705367Z"
},
"tags": []
},
"outputs": [],
"source": [
"from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader\n",
"\n",
"blob_loader = FileSystemBlobLoader(path=\".\", glob=\"*.mdx\", show_progress=True)"
"filesystem_blob_loader = FileSystemBlobLoader(\n",
" path=\".\", glob=\"*.mdx\", show_progress=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "77739dab-2a1e-4b64-8daa-fee8aa029972",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "45e85d3f63224bb59db02a40ae2e3268",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"page_content='# Microsoft Office\\n' metadata={'line_number': 1, 'source': 'office_file.mdx'}\n",
"page_content='# Markdown\\n' metadata={'line_number': 1, 'source': 'markdown.mdx'}\n",
"page_content='# JSON\\n' metadata={'line_number': 1, 'source': 'json.mdx'}\n",
"page_content='---\\n' metadata={'line_number': 1, 'source': 'pdf.mdx'}\n",
"page_content='---\\n' metadata={'line_number': 1, 'source': 'index.mdx'}\n",
"page_content='# File Directory\\n' metadata={'line_number': 1, 'source': 'file_directory.mdx'}\n",
"page_content='# CSV\\n' metadata={'line_number': 1, 'source': 'csv.mdx'}\n",
"page_content='# HTML\\n' metadata={'line_number': 1, 'source': 'html.mdx'}\n"
]
}
],
"execution_count": null,
"id": "21b91bad",
"metadata": {},
"outputs": [],
"source": [
"%pip install -q tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40be670b",
"metadata": {},
"outputs": [],
"source": [
"parser = MyParser()\n",
"for blob in blob_loader.yield_blobs():\n",
"for blob in filesystem_blob_loader.yield_blobs():\n",
" for doc in parser.lazy_parse(blob):\n",
" print(doc)\n",
" break"
@@ -620,56 +708,104 @@
"id": "f016390c-d38b-4261-946d-34eefe546df7",
"metadata": {},
"source": [
"### Generic Loader\n",
"\n",
"LangChain has a `GenericLoader` abstraction which composes a `BlobLoader` with a `BaseBlobParser`.\n",
"\n",
"`GenericLoader` is meant to provide standardized classmethods that make it easy to use existing `BlobLoader` implementations. At the moment, only the `FileSystemBlobLoader` is supported."
"Or, you can use `CloudBlobLoader` to load blobs from a cloud storage location (Supports s3://, az://, gs://, file:// schemes)."
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "1de74daf-70ee-4616-9089-d28e26b16851",
"execution_count": null,
"id": "8210714e",
"metadata": {},
"outputs": [],
"source": [
"%pip install -q 'cloudpathlib[s3]'"
]
},
{
"cell_type": "markdown",
"id": "d3f84501-b0aa-4a60-aad2-5109cbd37d4f",
"metadata": {},
"source": [
"```python\n",
"from cloudpathlib import S3Client, S3Path\n",
"from langchain_community.document_loaders.blob_loaders import CloudBlobLoader\n",
"\n",
"client = S3Client(no_sign_request=True)\n",
"client.set_as_default_client()\n",
"\n",
"path = S3Path(\n",
" \"s3://bucket-01\", client=client\n",
") # Supports s3://, az://, gs://, file:// schemes.\n",
"\n",
"cloud_loader = CloudBlobLoader(path, glob=\"**/*.pdf\", show_progress=True)\n",
"\n",
"for blob in cloud_loader.yield_blobs():\n",
" print(blob)\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "40c361ba4cd30164",
"metadata": {},
"source": [
"### Generic Loader\n",
"\n",
"LangChain has a `GenericLoader` abstraction which composes a `BlobLoader` with a `BaseBlobParser`.\n",
"\n",
"`GenericLoader` is meant to provide standardized classmethods that make it easy to use existing `BlobLoader` implementations. At the moment, the `FileSystemBlobLoader` and `CloudBlobLoader` are supported. See example below:"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "5dfb2be02fe662c5",
"metadata": {
"tags": []
"ExecuteTime": {
"end_time": "2025-04-21T08:50:16.244917Z",
"start_time": "2025-04-21T08:50:15.527562Z"
}
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5f1f6810a71a4909ac9fe1e8f8cb9e0a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 7/7 [00:00<00:00, 1224.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"page_content='# Microsoft Office\\n' metadata={'line_number': 1, 'source': 'office_file.mdx'}\n",
"page_content='\\n' metadata={'line_number': 2, 'source': 'office_file.mdx'}\n",
"page_content='>[The Microsoft Office](https://www.office.com/) suite of productivity software includes Microsoft Word, Microsoft Excel, Microsoft PowerPoint, Microsoft Outlook, and Microsoft OneNote. It is available for Microsoft Windows and macOS operating systems. It is also available on Android and iOS.\\n' metadata={'line_number': 3, 'source': 'office_file.mdx'}\n",
"page_content='\\n' metadata={'line_number': 4, 'source': 'office_file.mdx'}\n",
"page_content='This covers how to load commonly used file formats including `DOCX`, `XLSX` and `PPTX` documents into a document format that we can use downstream.\\n' metadata={'line_number': 5, 'source': 'office_file.mdx'}\n",
"page_content='# Text embedding models\n",
"' metadata={'line_number': 1, 'source': 'embed_text.mdx'}\n",
"page_content='\n",
"' metadata={'line_number': 2, 'source': 'embed_text.mdx'}\n",
"page_content=':::info\n",
"' metadata={'line_number': 3, 'source': 'embed_text.mdx'}\n",
"page_content='Head to [Integrations](/docs/integrations/text_embedding/) for documentation on built-in integrations with text embedding model providers.\n",
"' metadata={'line_number': 4, 'source': 'embed_text.mdx'}\n",
"page_content=':::\n",
"' metadata={'line_number': 5, 'source': 'embed_text.mdx'}\n",
"... output truncated for demo purposes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"from langchain_community.document_loaders.generic import GenericLoader\n",
"\n",
"loader = GenericLoader.from_filesystem(\n",
" path=\".\", glob=\"*.mdx\", show_progress=True, parser=MyParser()\n",
"generic_loader_filesystem = GenericLoader(\n",
" blob_loader=filesystem_blob_loader, blob_parser=parser\n",
")\n",
"\n",
"for idx, doc in enumerate(loader.lazy_load()):\n",
"for idx, doc in enumerate(generic_loader_filesystem.lazy_load()):\n",
" if idx < 5:\n",
" print(doc)\n",
"\n",
@@ -690,9 +826,13 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 28,
"id": "23633102-dc44-4fed-a4e1-8159489101c8",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:50:34.841862Z",
"start_time": "2025-04-21T08:50:34.838375Z"
},
"tags": []
},
"outputs": [],
@@ -709,37 +849,46 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 29,
"id": "dc95be85-4a29-4c6f-a260-08afa3c95538",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-21T08:50:34.901734Z",
"start_time": "2025-04-21T08:50:34.888098Z"
},
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4320598ea3b44a52b1873e1c801db312",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/8 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 7/7 [00:00<00:00, 814.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"page_content='# Microsoft Office\\n' metadata={'line_number': 1, 'source': 'office_file.mdx'}\n",
"page_content='\\n' metadata={'line_number': 2, 'source': 'office_file.mdx'}\n",
"page_content='>[The Microsoft Office](https://www.office.com/) suite of productivity software includes Microsoft Word, Microsoft Excel, Microsoft PowerPoint, Microsoft Outlook, and Microsoft OneNote. It is available for Microsoft Windows and macOS operating systems. It is also available on Android and iOS.\\n' metadata={'line_number': 3, 'source': 'office_file.mdx'}\n",
"page_content='\\n' metadata={'line_number': 4, 'source': 'office_file.mdx'}\n",
"page_content='This covers how to load commonly used file formats including `DOCX`, `XLSX` and `PPTX` documents into a document format that we can use downstream.\\n' metadata={'line_number': 5, 'source': 'office_file.mdx'}\n",
"page_content='# Text embedding models\n",
"' metadata={'line_number': 1, 'source': 'embed_text.mdx'}\n",
"page_content='\n",
"' metadata={'line_number': 2, 'source': 'embed_text.mdx'}\n",
"page_content=':::info\n",
"' metadata={'line_number': 3, 'source': 'embed_text.mdx'}\n",
"page_content='Head to [Integrations](/docs/integrations/text_embedding/) for documentation on built-in integrations with text embedding model providers.\n",
"' metadata={'line_number': 4, 'source': 'embed_text.mdx'}\n",
"page_content=':::\n",
"' metadata={'line_number': 5, 'source': 'embed_text.mdx'}\n",
"... output truncated for demo purposes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
@@ -769,7 +918,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
"version": "3.10.4"
}
},
"nbformat": 4,

View File

@@ -20,7 +20,7 @@
"\n",
"LangChain integrates with a host of PDF parsers. Some are simple and relatively low-level; others will support OCR and image-processing, or perform advanced document layout analysis. The right choice will depend on your needs. Below we enumerate the possibilities.\n",
"\n",
"We will demonstrate these approaches on a [sample file](https://github.com/langchain-ai/langchain/blob/master/libs/community/tests/integration_tests/examples/layout-parser-paper.pdf):"
"We will demonstrate these approaches on a [sample file](https://github.com/langchain-ai/langchain-community/blob/main/libs/community/tests/examples/layout-parser-paper.pdf):"
]
},
{

View File

@@ -167,7 +167,7 @@
"She was, in 1906, the first woman to become a professor at the University of Paris.\n",
"\"\"\"\n",
"documents = [Document(page_content=text)]\n",
"graph_documents = llm_transformer.convert_to_graph_documents(documents)\n",
"graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)\n",
"print(f\"Nodes:{graph_documents[0].nodes}\")\n",
"print(f\"Relationships:{graph_documents[0].relationships}\")"
]
@@ -205,7 +205,7 @@
" allowed_nodes=[\"Person\", \"Country\", \"Organization\"],\n",
" allowed_relationships=[\"NATIONALITY\", \"LOCATED_IN\", \"WORKED_AT\", \"SPOUSE\"],\n",
")\n",
"graph_documents_filtered = llm_transformer_filtered.convert_to_graph_documents(\n",
"graph_documents_filtered = await llm_transformer_filtered.aconvert_to_graph_documents(\n",
" documents\n",
")\n",
"print(f\"Nodes:{graph_documents_filtered[0].nodes}\")\n",
@@ -245,7 +245,9 @@
" allowed_nodes=[\"Person\", \"Country\", \"Organization\"],\n",
" allowed_relationships=allowed_relationships,\n",
")\n",
"graph_documents_filtered = llm_transformer_tuple.convert_to_graph_documents(documents)\n",
"graph_documents_filtered = await llm_transformer_tuple.aconvert_to_graph_documents(\n",
" documents\n",
")\n",
"print(f\"Nodes:{graph_documents_filtered[0].nodes}\")\n",
"print(f\"Relationships:{graph_documents_filtered[0].relationships}\")"
]
@@ -289,7 +291,9 @@
" allowed_relationships=[\"NATIONALITY\", \"LOCATED_IN\", \"WORKED_AT\", \"SPOUSE\"],\n",
" node_properties=[\"born_year\"],\n",
")\n",
"graph_documents_props = llm_transformer_props.convert_to_graph_documents(documents)\n",
"graph_documents_props = await llm_transformer_props.aconvert_to_graph_documents(\n",
" documents\n",
")\n",
"print(f\"Nodes:{graph_documents_props[0].nodes}\")\n",
"print(f\"Relationships:{graph_documents_props[0].relationships}\")"
]

View File

@@ -100,7 +100,7 @@
"id": "8554bae5",
"metadata": {},
"source": [
"A chat prompt is made up a of a list of messages. Similarly to the above example, we can concatenate chat prompt templates. Each new element is a new message in the final prompt.\n",
"A chat prompt is made up of a list of messages. Similarly to the above example, we can concatenate chat prompt templates. Each new element is a new message in the final prompt.\n",
"\n",
"First, let's initialize the a [`ChatPromptTemplate`](https://python.langchain.com/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html) with a [`SystemMessage`](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.system.SystemMessage.html)."
]

View File

@@ -162,7 +162,7 @@
"\n",
"table_chain = prompt | llm_with_tools | output_parser\n",
"\n",
"table_chain.invoke({\"input\": \"What are all the genres of Alanis Morisette songs\"})"
"table_chain.invoke({\"input\": \"What are all the genres of Alanis Morissette songs\"})"
]
},
{
@@ -206,7 +206,7 @@
")\n",
"\n",
"category_chain = prompt | llm_with_tools | output_parser\n",
"category_chain.invoke({\"input\": \"What are all the genres of Alanis Morisette songs\"})"
"category_chain.invoke({\"input\": \"What are all the genres of Alanis Morissette songs\"})"
]
},
{
@@ -261,7 +261,7 @@
"\n",
"\n",
"table_chain = category_chain | get_tables\n",
"table_chain.invoke({\"input\": \"What are all the genres of Alanis Morisette songs\"})"
"table_chain.invoke({\"input\": \"What are all the genres of Alanis Morissette songs\"})"
]
},
{
@@ -313,7 +313,7 @@
],
"source": [
"query = full_chain.invoke(\n",
" {\"question\": \"What are all the genres of Alanis Morisette songs\"}\n",
" {\"question\": \"What are all the genres of Alanis Morissette songs\"}\n",
")\n",
"print(query)"
]
@@ -346,7 +346,7 @@
"source": [
"We can see the LangSmith trace for this run [here](https://smith.langchain.com/public/4fbad408-3554-4f33-ab47-1e510a1b52a3/r).\n",
"\n",
"We've seen how to dynamically include a subset of table schemas in a prompt within a chain. Another possible approach to this problem is to let an Agent decide for itself when to look up tables by giving it a Tool to do so. You can see an example of this in the [SQL: Agents](/docs/tutorials/agents) guide."
"We've seen how to dynamically include a subset of table schemas in a prompt within a chain. Another possible approach to this problem is to let an Agent decide for itself when to look up tables by giving it a Tool to do so. You can see an example of this in the [SQL: Agents](/docs/tutorials/sql_qa/#agents) guide."
]
},
{
@@ -555,7 +555,7 @@
"source": [
"We can see that with retrieval we're able to correct the spelling from \"Elenis Moriset\" to \"Alanis Morissette\" and get back a valid result.\n",
"\n",
"Another possible approach to this problem is to let an Agent decide for itself when to look up proper nouns. You can see an example of this in the [SQL: Agents](/docs/tutorials/agents) guide."
"Another possible approach to this problem is to let an Agent decide for itself when to look up proper nouns. You can see an example of this in the [SQL: Agents](/docs/tutorials/sql_qa/#agents) guide."
]
}
],

View File

@@ -6,9 +6,9 @@
"source": [
"# How to disable parallel tool calling\n",
"\n",
":::info OpenAI-specific\n",
":::info Provider-specific\n",
"\n",
"This API is currently only supported by OpenAI.\n",
"This API is currently only supported by OpenAI and Anthropic.\n",
"\n",
":::\n",
"\n",
@@ -55,12 +55,12 @@
"import os\n",
"from getpass import getpass\n",
"\n",
"from langchain_openai import ChatOpenAI\n",
"from langchain.chat_models import init_chat_model\n",
"\n",
"if \"OPENAI_API_KEY\" not in os.environ:\n",
" os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
"\n",
"llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)"
"llm = init_chat_model(\"openai:gpt-4.1-mini\")"
]
},
{
@@ -121,7 +121,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
"version": "3.10.4"
}
},
"nbformat": 4,

View File

@@ -83,21 +83,28 @@ agent_executor.run("how many letters in the word educa?", callbacks=[handler])
Another example:
```python
from langchain.agents import load_tools, initialize_agent, AgentType
from langchain_openai import OpenAI
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
import os
from langchain_community.agent_toolkits.load_tools import load_tools
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
os.environ["LLMONITOR_APP_ID"] = ""
os.environ["OPENAI_API_KEY"] = ""
os.environ["SERPAPI_API_KEY"] = ""
handler = LLMonitorCallbackHandler()
llm = OpenAI(temperature=0)
llm = ChatOpenAI(temperature=0, callbacks=[handler])
tools = load_tools(["serpapi", "llm-math"], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, metadata={ "agent_name": "GirlfriendAgeFinder" }) # <- recommended, assign a custom name
agent = create_react_agent("openai:gpt-4.1-mini", tools)
agent.run(
"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?",
callbacks=[handler],
)
input_message = {
"role": "user",
"content": "What's the weather in SF?",
}
agent.invoke({"messages": [input_message]})
```
## User Tracking
@@ -110,7 +117,7 @@ with identify("user-123"):
llm.invoke("Tell me a joke")
with identify("user-456", user_props={"email": "user456@test.com"}):
agent.run("Who is Leo DiCaprio's girlfriend?")
agent.invoke(...)
```
## Support

File diff suppressed because it is too large Load Diff

View File

@@ -41,7 +41,7 @@
"### Credentials\n",
"\n",
"\n",
"Head to https://www.cloudflare.com/developer-platform/products/workers-ai/ to sign up to CloudflareWorkersAI and generate an API key. Once you've done this set the CF_API_KEY environment variable and the CF_ACCOUNT_ID environment variable:"
"Head to https://www.cloudflare.com/developer-platform/products/workers-ai/ to sign up to CloudflareWorkersAI and generate an API key. Once you've done this set the CF_AI_API_KEY environment variable and the CF_ACCOUNT_ID environment variable:"
]
},
{
@@ -56,8 +56,8 @@
"import getpass\n",
"import os\n",
"\n",
"if not os.getenv(\"CF_API_KEY\"):\n",
" os.environ[\"CF_API_KEY\"] = getpass.getpass(\n",
"if not os.getenv(\"CF_AI_API_KEY\"):\n",
" os.environ[\"CF_AI_API_KEY\"] = getpass.getpass(\n",
" \"Enter your CloudflareWorkersAI API key: \"\n",
" )\n",
"\n",

View File

@@ -0,0 +1,308 @@
{
"cells": [
{
"cell_type": "raw",
"id": "afaf8039",
"metadata": {
"vscode": {
"languageId": "raw"
}
},
"source": [
"---\n",
"sidebar_label: Featherless AI\n",
"---"
]
},
{
"cell_type": "markdown",
"id": "e49f1e0d",
"metadata": {},
"source": [
"# ChatFeatherlessAi\n",
"\n",
"\n",
"This will help you getting started with FeatherlessAi [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatFeatherlessAi features and configurations head to the [API reference](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/__module_name__.chat_models.ChatFeatherlessAi.html).\n",
"\n",
"- See https://featherless.ai/ for an example.\n",
"\n",
"## Overview\n",
"### Integration details\n",
"\n",
"\n",
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/__package_name_short_snake__) | Package downloads | Package latest |\n",
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
"| [ChatFeatherlessAi](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/__module_name__.chat_models.ChatFeatherlessAi.html) | [langchain-featherless-ai](https://python.langchain.com/api_reference/__package_name_short_snake__/) | ✅ | ❌ | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-featherless-ai?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-featherless-ai?style=flat-square&label=%20) |\n",
"\n",
"### Model features\n",
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
"| ❌ | ❌ | ✅| ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
"\n",
"## Setup\n",
"\n",
"\n",
"To access Featherless AI models you'll need to create a/an Featherless AI account, get an API key, and install the `langchain-featherless-ai` integration package.\n",
"\n",
"### Credentials\n",
"\n",
"\n",
"Head to https://featherless.ai/ to sign up to FeatherlessAI and generate an API key. Once you've done this set the FEATHERLESSAI_API_KEY environment variable:"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "433e8d2b-9519-4b49-b2c4-7ab65b046c94",
"metadata": {},
"outputs": [],
"source": [
"import getpass\n",
"import os\n",
"\n",
"if not os.getenv(\"FEATHERLESSAI_API_KEY\"):\n",
" os.environ[\"FEATHERLESSAI_API_KEY\"] = getpass.getpass(\n",
" \"Enter your FeatherlessAI API key: \"\n",
" )"
]
},
{
"cell_type": "markdown",
"id": "72ee0c4b-9764-423a-9dbf-95129e185210",
"metadata": {},
"source": [
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
"metadata": {},
"outputs": [],
"source": [
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
]
},
{
"cell_type": "markdown",
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
"metadata": {},
"source": [
"### Installation\n",
"\n",
"The LangChain FeatherlessAi integration lives in the `langchain-featherless-ai` package:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install -qU langchain-featherless-ai"
]
},
{
"cell_type": "markdown",
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
"metadata": {},
"source": [
"## Instantiation\n",
"\n",
"Now we can instantiate our model object and generate chat completions:\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
"metadata": {},
"outputs": [],
"source": [
"from langchain_featherless_ai import ChatFeatherlessAi\n",
"\n",
"llm = ChatFeatherlessAi(\n",
" model=\"featherless-ai/Qwerky-72B\",\n",
" temperature=0.9,\n",
" max_tokens=None,\n",
" timeout=None,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "2b4f3e15",
"metadata": {},
"source": [
"## Invocation"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "62e0dbc3",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Python311\\Lib\\site-packages\\pydantic\\main.py:463: UserWarning: Pydantic serializer warnings:\n",
" PydanticSerializationUnexpectedValue(Expected `int` - serialized value may not be as expected [input_value=1747322408.706, input_type=float])\n",
" return self.__pydantic_serializer__.to_python(\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content=\"J'aime programmer.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 27, 'total_tokens': 32, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'featherless-ai/Qwerky-72B', 'system_fingerprint': '', 'id': 'G1sgui', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--6ecbe184-c94e-4d03-bf75-9bd85b04ba5b-0', usage_metadata={'input_tokens': 27, 'output_tokens': 5, 'total_tokens': 32, 'input_token_details': {}, 'output_token_details': {}})"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"messages = [\n",
" (\n",
" \"system\",\n",
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
" ),\n",
" (\"human\", \"I love programming.\"),\n",
"]\n",
"ai_msg = llm.invoke(messages)\n",
"ai_msg"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"J'aime programmer.\n"
]
}
],
"source": [
"print(ai_msg.content)"
]
},
{
"cell_type": "markdown",
"id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
"metadata": {},
"source": [
"## Chaining\n",
"\n",
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fca9e713",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 24,
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Python311\\Lib\\site-packages\\pydantic\\main.py:463: UserWarning: Pydantic serializer warnings:\n",
" PydanticSerializationUnexpectedValue(Expected `int` - serialized value may not be as expected [input_value=1747322423.487, input_type=float])\n",
" return self.__pydantic_serializer__.to_python(\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content='Ich liebe Programmieren.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 22, 'total_tokens': 27, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'featherless-ai/Qwerky-72B', 'system_fingerprint': '', 'id': 'BoBqht', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--67464357-83d1-4591-9a62-303ed74b8148-0', usage_metadata={'input_tokens': 22, 'output_tokens': 5, 'total_tokens': 27, 'input_token_details': {}, 'output_token_details': {}})"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain_core.prompts import ChatPromptTemplate\n",
"\n",
"prompt = ChatPromptTemplate(\n",
" [\n",
" (\n",
" \"system\",\n",
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
" ),\n",
" (\"human\", \"{input}\"),\n",
" ]\n",
")\n",
"\n",
"chain = prompt | llm\n",
"chain.invoke(\n",
" {\n",
" \"input_language\": \"English\",\n",
" \"output_language\": \"German\",\n",
" \"input\": \"I love programming.\",\n",
" }\n",
")"
]
},
{
"cell_type": "markdown",
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
"metadata": {},
"source": [
"## API reference\n",
"\n",
"For detailed documentation of all ChatFeatherlessAi features and configurations head to the [API reference](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/.chat_models.ChatFeatherlessAi.html)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -1,117 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"# GigaChat\n",
"This notebook shows how to use LangChain with [GigaChat](https://developers.sber.ru/portal/products/gigachat).\n",
"To use you need to install ```langchain_gigachat``` python package."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"pycharm": {
"is_executing": true
}
},
"outputs": [],
"source": [
"%pip install --upgrade --quiet langchain-gigachat"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"To get GigaChat credentials you need to [create account](https://developers.sber.ru/studio/login) and [get access to API](https://developers.sber.ru/docs/ru/gigachat/individuals-quickstart)\n",
"\n",
"## Example"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\n",
"from getpass import getpass\n",
"\n",
"if \"GIGACHAT_CREDENTIALS\" not in os.environ:\n",
" os.environ[\"GIGACHAT_CREDENTIALS\"] = getpass()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from langchain_gigachat import GigaChat\n",
"\n",
"chat = GigaChat(verify_ssl_certs=False, scope=\"GIGACHAT_API_PERS\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The capital of Russia is Moscow.\n"
]
}
],
"source": [
"from langchain_core.messages import HumanMessage, SystemMessage\n",
"\n",
"messages = [\n",
" SystemMessage(\n",
" content=\"You are a helpful AI that shares everything you know. Talk in English.\"\n",
" ),\n",
" HumanMessage(content=\"What is capital of Russia?\"),\n",
"]\n",
"\n",
"print(chat.invoke(messages).content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View File

@@ -915,6 +915,175 @@
"response_2.text()"
]
},
{
"cell_type": "markdown",
"id": "34ad0015-688c-4274-be55-93268b44f558",
"metadata": {},
"source": [
"#### Code interpreter\n",
"\n",
"OpenAI implements a [code interpreter](https://platform.openai.com/docs/guides/tools-code-interpreter) tool to support the sandboxed generation and execution of code.\n",
"\n",
"Example use:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "34826aae-6d48-4b84-bc00-89594a87d461",
"metadata": {},
"outputs": [],
"source": [
"from langchain_openai import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model=\"o4-mini\", use_responses_api=True)\n",
"\n",
"llm_with_tools = llm.bind_tools(\n",
" [\n",
" {\n",
" \"type\": \"code_interpreter\",\n",
" # Create a new container\n",
" \"container\": {\"type\": \"auto\"},\n",
" }\n",
" ]\n",
")\n",
"response = llm_with_tools.invoke(\n",
" \"Write and run code to answer the question: what is 3^3?\"\n",
")"
]
},
{
"cell_type": "markdown",
"id": "1b4d92b9-941f-4d54-93a5-b0c73afd66b2",
"metadata": {},
"source": [
"Note that the above command created a new container. We can also specify an existing container ID:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d8c82895-5011-4062-a1bb-278ec91321e9",
"metadata": {},
"outputs": [],
"source": [
"tool_outputs = response.additional_kwargs[\"tool_outputs\"]\n",
"assert len(tool_outputs) == 1\n",
"# highlight-next-line\n",
"container_id = tool_outputs[0][\"container_id\"]\n",
"\n",
"llm_with_tools = llm.bind_tools(\n",
" [\n",
" {\n",
" \"type\": \"code_interpreter\",\n",
" # Use an existing container\n",
" # highlight-next-line\n",
" \"container\": container_id,\n",
" }\n",
" ]\n",
")"
]
},
{
"cell_type": "markdown",
"id": "8db30501-522c-4915-963d-d60539b5c16e",
"metadata": {},
"source": [
"#### Remote MCP\n",
"\n",
"OpenAI implements a [remote MCP](https://platform.openai.com/docs/guides/tools-remote-mcp) tool that allows for model-generated calls to MCP servers.\n",
"\n",
"Example use:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "7044a87b-8b99-49e8-8ca4-e2a8ae49f65a",
"metadata": {},
"outputs": [],
"source": [
"from langchain_openai import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model=\"o4-mini\", use_responses_api=True)\n",
"\n",
"llm_with_tools = llm.bind_tools(\n",
" [\n",
" {\n",
" \"type\": \"mcp\",\n",
" \"server_label\": \"deepwiki\",\n",
" \"server_url\": \"https://mcp.deepwiki.com/mcp\",\n",
" \"require_approval\": \"never\",\n",
" }\n",
" ]\n",
")\n",
"response = llm_with_tools.invoke(\n",
" \"What transport protocols does the 2025-03-26 version of the MCP \"\n",
" \"spec (modelcontextprotocol/modelcontextprotocol) support?\"\n",
")"
]
},
{
"cell_type": "markdown",
"id": "0ed7494e-425d-4bdf-ab83-3164757031dd",
"metadata": {},
"source": [
"<details>\n",
"<summary>MCP Approvals</summary>\n",
"\n",
"OpenAI will at times request approval before sharing data with a remote MCP server.\n",
"\n",
"In the above command, we instructed the model to never require approval. We can also configure the model to always request approval, or to always request approval for specific tools:\n",
"\n",
"```python\n",
"llm_with_tools = llm.bind_tools(\n",
" [\n",
" {\n",
" \"type\": \"mcp\",\n",
" \"server_label\": \"deepwiki\",\n",
" \"server_url\": \"https://mcp.deepwiki.com/mcp\",\n",
" \"require_approval\": {\n",
" \"always\": {\n",
" \"tool_names\": [\"read_wiki_structure\"]\n",
" }\n",
" }\n",
" }\n",
" ]\n",
")\n",
"response = llm_with_tools.invoke(\n",
" \"What transport protocols does the 2025-03-26 version of the MCP \"\n",
" \"spec (modelcontextprotocol/modelcontextprotocol) support?\"\n",
")\n",
"```\n",
"\n",
"Responses may then include blocks with type `\"mcp_approval_request\"`.\n",
"\n",
"To submit approvals for an approval request, structure it into a content block in an input message:\n",
"\n",
"```python\n",
"approval_message = {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"mcp_approval_response\",\n",
" \"approve\": True,\n",
" \"approval_request_id\": output[\"id\"],\n",
" }\n",
" for output in response.additional_kwargs[\"tool_outputs\"]\n",
" if output[\"type\"] == \"mcp_approval_request\"\n",
" ]\n",
"}\n",
"\n",
"next_response = llm_with_tools.invoke(\n",
" [approval_message],\n",
" # continue existing thread\n",
" previous_response_id=response.response_metadata[\"id\"]\n",
")\n",
"```\n",
"\n",
"</details>"
]
},
{
"cell_type": "markdown",
"id": "6fda05f0-4b81-4709-9407-f316d760ad50",

View File

@@ -14,7 +14,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API."
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
"> through an easy-to-use JSON API."
]
},
{

View File

@@ -6,20 +6,54 @@
"source": [
"# Confluence\n",
"\n",
">[Confluence](https://www.atlassian.com/software/confluence) is a wiki collaboration platform that saves and organizes all of the project-related material. `Confluence` is a knowledge base that primarily handles content management activities. \n",
"[Confluence](https://www.atlassian.com/software/confluence) is a wiki collaboration platform designed to save and organize all project-related materials. As a knowledge base, Confluence primarily serves content management activities.\n",
"\n",
"A loader for `Confluence` pages.\n",
"This loader allows you to fetch and process Confluence pages into `Document` objects.\n",
"\n",
"---\n",
"\n",
"This currently supports `username/api_key`, `Oauth2 login`, `cookies`. Additionally, on-prem installations also support `token` authentication. \n",
"## Authentication Methods\n",
"\n",
"The following authentication methods are supported:\n",
"\n",
"Specify a list `page_id`-s and/or `space_key` to load in the corresponding pages into Document objects, if both are specified the union of both sets will be returned.\n",
"- `username/api_key`\n",
"- `OAuth2 login`\n",
"- `cookies`\n",
"- On-premises installations: `token` authentication\n",
"\n",
"---\n",
"\n",
"You can also specify a boolean `include_attachments` to include attachments, this is set to False by default, if set to True all attachments will be downloaded and ConfluenceReader will extract the text from the attachments and add it to the Document object. Currently supported attachment types are: `PDF`, `PNG`, `JPEG/JPG`, `SVG`, `Word` and `Excel`.\n",
"## Page Selection\n",
"\n",
"Hint: `space_key` and `page_id` can both be found in the URL of a page in Confluence - https://yoursite.atlassian.com/wiki/spaces/&lt;space_key&gt;/pages/&lt;page_id&gt;\n"
"You can specify which pages to load using:\n",
"\n",
"- **page_ids** (*list*): \n",
" A list of `page_id` values to load the corresponding pages.\n",
"\n",
"- **space_key** (*string*): \n",
" A string of `space_key` value to load all pages within the specified confluence space.\n",
"\n",
"If both `page_ids` and `space_key` are provided, the loader will return the union of pages from both lists.\n",
"\n",
"*Hint:* Both `space_key` and `page_id` can be found in the URL of a Confluence page: \n",
"`https://yoursite.atlassian.com/wiki/spaces/{space_key}/pages/{page_id}`\n",
"\n",
"---\n",
"\n",
"## Attachments\n",
"\n",
"You may include attachments in the loaded `Document` objects by setting the boolean parameter **include_attachments** to `True` (default: `False`). When enabled, all attachments are downloaded and their text content is extracted and added to the Document.\n",
"\n",
"**Currently supported attachment types:**\n",
"\n",
"- PDF (`.pdf`)\n",
"- PNG (`.png`)\n",
"- JPEG/JPG (`.jpeg`, `.jpg`)\n",
"- SVG (`.svg`)\n",
"- Word (`.doc`, `.docx`)\n",
"- Excel (`.xls`, `.xlsx`)\n",
"\n",
"---"
]
},
{
@@ -70,9 +104,14 @@
"from langchain_community.document_loaders import ConfluenceLoader\n",
"\n",
"loader = ConfluenceLoader(\n",
" url=\"https://yoursite.atlassian.com/wiki\", username=\"me\", api_key=\"12345\"\n",
" url=\"https://yoursite.atlassian.com/wiki\",\n",
" username=\"<your-confluence-username>\",\n",
" api_key=\"<your-api-token>\",\n",
" space_key=\"<your-space-key>\",\n",
" include_attachments=True,\n",
" limit=50,\n",
")\n",
"documents = loader.load(space_key=\"SPACE\", include_attachments=True, limit=50)"
"documents = loader.load()"
]
},
{
@@ -95,10 +134,15 @@
"source": [
"from langchain_community.document_loaders import ConfluenceLoader\n",
"\n",
"loader = ConfluenceLoader(url=\"https://yoursite.atlassian.com/wiki\", token=\"12345\")\n",
"documents = loader.load(\n",
" space_key=\"SPACE\", include_attachments=True, limit=50, max_pages=50\n",
")"
"loader = ConfluenceLoader(\n",
" url=\"https://confluence.yoursite.com/\",\n",
" token=\"<your-personal-access-token>\",\n",
" space_key=\"<your-space-key>\",\n",
" include_attachments=True,\n",
" limit=50,\n",
" max_pages=50,\n",
")\n",
"documents = loader.load()"
]
}
],

View File

@@ -57,7 +57,7 @@
},
"outputs": [],
"source": [
"%pip install -upgrade --quiet langchain-google-firestore"
"%pip install --upgrade --quiet langchain-google-firestore"
]
},
{

View File

@@ -1214,9 +1214,7 @@
"source": [
"### Connecting to the DB\n",
"\n",
"The Cassandra caches shown in this page can be used with Cassandra as well as other derived databases, such as Astra DB, which use the CQL (Cassandra Query Language) protocol.\n",
"\n",
"> DataStax [Astra DB](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths.\n",
"The Cassandra caches shown in this page can be used with Cassandra as well as other derived databases that can use the CQL (Cassandra Query Language) protocol, such as DataStax Astra DB.\n",
"\n",
"Depending on whether you connect to a Cassandra cluster or to Astra DB through CQL, you will provide different parameters when instantiating the cache (through initialization of a CassIO connection)."
]
@@ -1517,6 +1515,12 @@
"source": [
"You can easily use [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as an LLM cache, with either the \"exact\" or the \"semantic-based\" cache.\n",
"\n",
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
"> through an easy-to-use JSON API.\n",
"\n",
"_This approach differs from the `Cassandra` caches mentioned above in that it natively uses the HTTP Data API. The Data API is specific to Astra DB. Keep in mind that the storage format will also differ._\n",
"\n",
"Make sure you have a running database (it must be a Vector-enabled database to use the Semantic cache) and get the required credentials on your Astra dashboard:\n",
"\n",
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
@@ -3160,7 +3164,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.12.0"
}
},
"nbformat": 4,

View File

@@ -1,118 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"# GigaChat\n",
"This notebook shows how to use LangChain with [GigaChat](https://developers.sber.ru/portal/products/gigachat).\n",
"To use you need to install ```gigachat``` python package."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"pycharm": {
"is_executing": true
}
},
"outputs": [],
"source": [
"%pip install --upgrade --quiet gigachat"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"To get GigaChat credentials you need to [create account](https://developers.sber.ru/studio/login) and [get access to API](https://developers.sber.ru/docs/ru/gigachat/individuals-quickstart)\n",
"\n",
"## Example"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\n",
"from getpass import getpass\n",
"\n",
"if \"GIGACHAT_CREDENTIALS\" not in os.environ:\n",
" os.environ[\"GIGACHAT_CREDENTIALS\"] = getpass()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from langchain_community.llms import GigaChat\n",
"\n",
"llm = GigaChat(verify_ssl_certs=False, scope=\"GIGACHAT_API_PERS\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The capital of Russia is Moscow.\n"
]
}
],
"source": [
"from langchain.chains import LLMChain\n",
"from langchain_core.prompts import PromptTemplate\n",
"\n",
"template = \"What is capital of {country}?\"\n",
"\n",
"prompt = PromptTemplate.from_template(template)\n",
"\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"\n",
"generated = llm_chain.invoke(input={\"country\": \"Russia\"})\n",
"print(generated[\"text\"])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -7,7 +7,9 @@
"source": [
"# Astra DB \n",
"\n",
"> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API.\n",
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
"> AI-ready database built on `Apache Cassandra®` and made conveniently availablev\n",
"> through an easy-to-use JSON API.\n",
"\n",
"This notebook goes over how to use Astra DB to store chat message history."
]

View File

@@ -8,17 +8,19 @@
Install the AVS Python SDK and AVS langchain vector store:
```bash
pip install aerospike-vector-search langchain-community
pip install aerospike-vector-search langchain-aerospike
```
See the documentation for the Ptyhon SDK [here](https://aerospike-vector-search-python-client.readthedocs.io/en/latest/index.html).
The documentation for the AVS langchain vector store is [here](https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.aerospike.Aerospike.html).
See the documentation for the Python SDK [here](https://aerospike-vector-search-python-client.readthedocs.io/en/latest/index.html).
The documentation for the AVS langchain vector store is [here](https://langchain-aerospike.readthedocs.io/en/latest/).
## Vector Store
To import this vectorstore:
```python
from langchain_community.vectorstores import Aerospike
from langchain_aerospike.vectorstores import Aerospike
```
See a usage example [here](https://python.langchain.com/docs/integrations/vectorstores/aerospike/).

View File

@@ -1,8 +1,6 @@
# Astra DB
> [DataStax Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless
> vector-capable database built on `Apache Cassandra®`and made conveniently available
> through an easy-to-use JSON API.
> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless AI-ready database built on `Apache Cassandra®` and made conveniently available through an easy-to-use JSON API.
See a [tutorial provided by DataStax](https://docs.datastax.com/en/astra/astra-db-vector/tutorials/chatbot.html).
@@ -10,19 +8,21 @@ See a [tutorial provided by DataStax](https://docs.datastax.com/en/astra/astra-d
Install the following Python package:
```bash
pip install "langchain-astradb>=0.1.0"
pip install "langchain-astradb>=0.6,<0.7"
```
Get the [connection secrets](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html).
Set up the following environment variables:
Create a database (if needed) and get the [connection secrets](https://docs.datastax.com/en/astra-db-serverless/get-started/quickstart.html#create-a-database-and-store-your-credentials).
Set the following variables:
```python
ASTRA_DB_APPLICATION_TOKEN="TOKEN"
ASTRA_DB_API_ENDPOINT="API_ENDPOINT"
ASTRA_DB_APPLICATION_TOKEN="TOKEN"
```
## Vector Store
A few typical initialization patterns are shown here:
```python
from langchain_astradb import AstraDBVectorStore
@@ -32,8 +32,56 @@ vector_store = AstraDBVectorStore(
api_endpoint=ASTRA_DB_API_ENDPOINT,
token=ASTRA_DB_APPLICATION_TOKEN,
)
from astrapy.info import VectorServiceOptions
vector_store_vectorize = AstraDBVectorStore(
collection_name="my_vectorize_store",
api_endpoint=ASTRA_DB_API_ENDPOINT,
token=ASTRA_DB_APPLICATION_TOKEN,
collection_vector_service_options=VectorServiceOptions(
provider="nvidia",
model_name="NV-Embed-QA",
),
)
from astrapy.info import (
CollectionLexicalOptions,
CollectionRerankOptions,
RerankServiceOptions,
VectorServiceOptions,
)
vector_store_hybrid = AstraDBVectorStore(
collection_name="my_hybrid_store",
api_endpoint=ASTRA_DB_API_ENDPOINT,
token=ASTRA_DB_APPLICATION_TOKEN,
collection_vector_service_options=VectorServiceOptions(
provider="nvidia",
model_name="NV-Embed-QA",
),
collection_lexical=CollectionLexicalOptions(analyzer="standard"),
collection_rerank=CollectionRerankOptions(
service=RerankServiceOptions(
provider="nvidia",
model_name="nvidia/llama-3.2-nv-rerankqa-1b-v2",
),
),
)
```
Notable features of class `AstraDBVectorStore`:
- native async API;
- metadata filtering in search;
- MMR (maximum marginal relevance) search;
- server-side embedding computation (["vectorize"](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html) in Astra DB parlance);
- auto-detect its settings from an existing, pre-populated Astra DB collection;
- [hybrid search](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html#the-hybrid-search-process) (vector + BM25 and then a rerank step);
- support for non-Astra Data API (e.g. self-hosted [HCD](https://docs.datastax.com/en/hyper-converged-database/1.1/get-started/get-started-hcd.html) deployments);
Learn more in the [example notebook](/docs/integrations/vectorstores/astradb).
See the [example provided by DataStax](https://docs.datastax.com/en/astra/astra-db-vector/integrations/langchain.html).
@@ -82,8 +130,6 @@ set_llm_cache(AstraDBSemanticCache(
Learn more in the [example notebook](/docs/integrations/llm_caching#astra-db-caches) (scroll to the appropriate section).
Learn more in the [example notebook](/docs/integrations/memory/astradb_chat_message_history).
## Document loader
```python

View File

@@ -0,0 +1,34 @@
# Bright Data
[Bright Data](https://brightdata.com) is a web data platform that provides tools for web scraping, SERP collection, and accessing geo-restricted content.
Bright Data allows developers to extract structured data from websites, perform search engine queries, and access content that might be otherwise blocked or geo-restricted. The platform is designed to help overcome common web scraping challenges including anti-bot systems, CAPTCHAs, and IP blocks.
## Installation and Setup
```bash
pip install langchain-brightdata
```
You'll need to set up your Bright Data API key:
```python
import os
os.environ["BRIGHT_DATA_API_KEY"] = "your-api-key"
```
Or you can pass it directly when initializing tools:
```python
from langchain_bright_data import BrightDataSERP
tool = BrightDataSERP(bright_data_api_key="your-api-key")
```
## Tools
The Bright Data integration provides several tools:
- [BrightDataSERP](/docs/integrations/tools/brightdata_serp) - Search engine results collection with geo-targeting
- [BrightDataUnblocker](/docs/integrations/tools/brightdata_unlocker) - Access ANY public website that might be geo-restricted or bot-protected
- [BrightDataWebScraperAPI](/docs/integrations/tools/brightdata-webscraperapi) - Extract structured data from 100+ ppoular domains, e.g. Amazon product details and LinkedIn profiles

View File

@@ -32,7 +32,7 @@ For a detailed walkthrough of this wrapper, see [this notebook](/docs/integratio
You can also load this wrapper as a Tool to use with an Agent:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["dataforseo-api-search"])
```

View File

@@ -1,8 +1,8 @@
# Doctran
>[Doctran](https://github.com/psychic-api/doctran) is a python package. It uses LLMs and open-source
> NLP libraries to transform raw text into clean, structured, information-dense documents
> that are optimized for vector space retrieval. You can think of `Doctran` as a black box where
>[Doctran](https://github.com/psychic-api/doctran) is a python package. It uses LLMs and open-source
> NLP libraries to transform raw text into clean, structured, information-dense documents
> that are optimized for vector space retrieval. You can think of `Doctran` as a black box where
> messy strings go in and nice, clean, labelled strings come out.
@@ -19,19 +19,19 @@ pip install doctran
See a [usage example for DoctranQATransformer](/docs/integrations/document_transformers/doctran_interrogate_document).
```python
from langchain_community.document_loaders import DoctranQATransformer
from langchain_community.document_transformers import DoctranQATransformer
```
### Property Extractor
See a [usage example for DoctranPropertyExtractor](/docs/integrations/document_transformers/doctran_extract_properties).
```python
from langchain_community.document_loaders import DoctranPropertyExtractor
from langchain_community.document_transformers import DoctranPropertyExtractor
```
### Document Translator
See a [usage example for DoctranTextTranslator](/docs/integrations/document_transformers/doctran_translate_document).
```python
from langchain_community.document_loaders import DoctranTextTranslator
from langchain_community.document_transformers import DoctranTextTranslator
```

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
# Featherless AI
[Featherless AI](https://featherless.ai/) is a serverless AI inference platform that offers access to over 4300+ open-source models. Our goal is to make all AI models available for serverless inference. We provide inference via API to a continually expanding library of open-weight models.
# Installation and Setup
`pip install langchain-featherless-ai`
1. Sign up for an account at [Featherless](https://featherless.ai/register)
2. Subscribe to a plan and get your API key from [API Keys](https://featherless.ai/account/api-keys)
3. Set up your API key as an environment variable(`FEATHERLESSAI_API_KEY`)
# Model catalog
Visit our model catalog for an overview of all our models: https://featherless.ai/models

View File

@@ -1,6 +1,6 @@
# Friendli AI
> [FriendliAI](https://friendli.ai/) enhances AI application performance and optimizes
> [FriendliAI](https://friendli.ai/) enhances AI application performance and optimizes
> cost savings with scalable, efficient deployment options, tailored for high-demand AI workloads.
## Installation and setup
@@ -11,8 +11,8 @@ Install the `friendli-client` python package.
pip install -U langchain_community friendli-client
```
Sign in to [Friendli Suite](https://suite.friendli.ai/) to create a Personal Access Token,
and set it as the `FRIENDLI_TOKEN` environment variabzle.
Sign in to [Friendli Suite](https://suite.friendli.ai/) to create a Personal Access Token,
and set it as the `FRIENDLI_TOKEN` environment variable.
## Chat models

View File

@@ -0,0 +1,56 @@
# Gel
[Gel](https://www.geldata.com/) is a powerful data platform built on top of PostgreSQL.
- Think in objects and graphs instead of tables and JOINs.
- Use the advanced Python SDK, integrated GUI, migrations engine, Auth and AI layers, and much more.
- Run locally, remotely, or in a [fully managed cloud](https://www.geldata.com/cloud).
## Installation
```bash
pip install langchain-gel
```
## Setup
1. Run `gel project init`
2. Edit the schema. You need the following types to use the LangChain vectorstore:
```gel
using extension pgvector;
module default {
scalar type EmbeddingVector extending ext::pgvector::vector<1536>;
type Record {
required collection: str;
text: str;
embedding: EmbeddingVector;
external_id: str {
constraint exclusive;
};
metadata: json;
index ext::pgvector::hnsw_cosine(m := 16, ef_construction := 128)
on (.embedding)
}
}
```
> Note: this is the minimal setup. Feel free to add as many types, properties and links as you want!
> Learn more about taking advantage of Gel's schema by reading the [docs](https://docs.geldata.com/learn/schema).
3. Run the migration: `gel migration create && gel migrate`.
## Usage
```python
from langchain_gel import GelVectorStore
vector_store = GelVectorStore(
embeddings=embeddings,
)
```
See the full usage example [here](/docs/integrations/vectorstores/gel).

View File

@@ -27,7 +27,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
You can also easily load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["golden-query"])
```

View File

@@ -880,7 +880,7 @@ from langchain_community.tools import GoogleSearchRun, GoogleSearchResults
Agent Loading:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["google-search"])
```
@@ -1313,7 +1313,7 @@ from langchain_community.tools import GoogleSearchRun, GoogleSearchResults
Agent Loading:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["google-search"])
```

View File

@@ -67,7 +67,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
You can also easily load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["google-serper"])
```

View File

@@ -1,6 +1,6 @@
# Groq
>[Groq](https://groq.com)developed the world's first Language Processing Unit™, or `LPU`.
>[Groq](https://groq.com) developed the world's first Language Processing Unit™, or `LPU`.
> The `Groq LPU` has a deterministic, single core streaming architecture that sets the standard
> for GenAI inference speed with predictable and repeatable performance for any given workload.
>

View File

@@ -21,13 +21,3 @@ To import this vectorstore:
from langchain_milvus import Milvus
```
## Retrievers
See a [usage example](/docs/integrations/retrievers/milvus_hybrid_search).
To import this vectorstore:
```python
from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever
from langchain_milvus.utils.sparse import BM25SparseEmbedding
```

View File

@@ -37,8 +37,12 @@ You can also easily load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
tools = load_tools(["openweathermap-api"])
import os
from langchain_community.utilities import OpenWeatherMapAPIWrapper
os.environ["OPENWEATHERMAP_API_KEY"] = ""
weather = OpenWeatherMapAPIWrapper()
tools = [weather.run]
```
For more information on tools, see [this page](/docs/how_to/tools_builtin).

View File

@@ -29,11 +29,44 @@ For a more detailed walkthrough of the Pinecone vectorstore, see [this notebook]
### Sparse Vector store
LangChain's `PineconeSparseVectorStore` enables sparse retrieval using Pinecone's sparse English model. It maps text to sparse vectors and supports adding documents and similarity search.
```python
from langchain_pinecone import PineconeSparseVectorStore
# Initialize sparse vector store
vector_store = PineconeSparseVectorStore(
index=my_index,
embedding_model="pinecone-sparse-english-v0"
)
# Add documents
vector_store.add_documents(documents)
# Query
results = vector_store.similarity_search("your query", k=3)
```
For a more detailed walkthrough of the Pinecone vectorstore, see [this notebook](/docs/integrations/vectorstores/pinecone_sparse)
For a more detailed walkthrough, see the [Pinecone Sparse Vector Store notebook](/docs/integrations/vectorstores/pinecone_sparse).
### Sparse Embedding
LangChain's `PineconeSparseEmbeddings` provides sparse embedding generation using Pinecone's `pinecone-sparse-english-v0` model.
```python
from langchain_pinecone.embeddings import PineconeSparseEmbeddings
# Initialize sparse embeddings
sparse_embeddings = PineconeSparseEmbeddings(
model="pinecone-sparse-english-v0"
)
# Embed a single query (returns SparseValues)
query_embedding = sparse_embeddings.embed_query("sample text")
# Embed multiple documents (returns list of SparseValues)
docs = ["Document 1 content", "Document 2 content"]
doc_embeddings = sparse_embeddings.embed_documents(docs)
```
For more detailed usage, see the [Pinecone Sparse Embeddings notebook](/docs/integrations/vectorstores/pinecone_sparse).
## Retrievers

View File

@@ -1,37 +0,0 @@
# Salute Devices
Salute Devices provides GigaChat LLM's models.
For more info how to get access to GigaChat [follow here](https://developers.sber.ru/docs/ru/gigachat/api/integration).
## Installation and Setup
GigaChat package can be installed via pip from PyPI:
```bash
pip install langchain-gigachat
```
## LLMs
See a [usage example](/docs/integrations/llms/gigachat).
```python
from langchain_community.llms import GigaChat
```
## Chat models
See a [usage example](/docs/integrations/chat/gigachat).
```python
from langchain_gigachat.chat_models import GigaChat
```
## Embeddings
See a [usage example](/docs/integrations/text_embedding/gigachat).
```python
from langchain_gigachat.embeddings import GigaChatEmbeddings
```

View File

@@ -73,7 +73,7 @@ You can also easily load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["searchapi"])
```

View File

@@ -52,7 +52,7 @@ You can also load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["searx-search"],
searx_host="http://localhost:8888",
engines=["github"])

View File

@@ -24,7 +24,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
You can also easily load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["serpapi"])
```

View File

@@ -29,7 +29,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
You can also easily load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["stackexchange"])
```

View File

@@ -315,6 +315,17 @@
"Vectara offers Intelligent Query Rewriting option which enhances search precision by automatically generating metadata filter expressions from natural language queries. This capability analyzes user queries, extracts relevant metadata filters, and rephrases the query to focus on the core information need. For more details [go to this notebook](../retrievers/self_query/vectara_self_query.ipynb)."
]
},
{
"cell_type": "markdown",
"source": [
"## Vectara tools\n",
"Vectara provides serval tools that can be used with Langchain. For more details [go to this notebook](../tools/vectara.ipynb)"
],
"metadata": {
"collapsed": false
},
"id": "beadf6f485c1a69"
},
{
"cell_type": "code",
"execution_count": null,

View File

@@ -32,7 +32,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
You can also easily load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
from langchain_community.agent_toolkits.load_tools import load_tools
tools = load_tools(["wolfram-alpha"])
```

View File

@@ -365,7 +365,7 @@
],
"source": [
"from langchain.retrievers import ContextualCompressionRetriever\n",
"from langchain.retrievers.document_compressors import FlashrankRerank\n",
"from langchain_community.document_compressors import FlashrankRerank\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(temperature=0)\n",

View File

@@ -1,639 +0,0 @@
{
"cells": [
{
"cell_type": "raw",
"metadata": {},
"source": [
"---\n",
"sidebar_label: Milvus Hybrid Search\n",
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Milvus Hybrid Search Retriever\n",
"\n",
"> [Milvus](https://milvus.io/docs) is an open-source vector database built to power embedding similarity search and AI applications. Milvus makes unstructured data search more accessible, and provides a consistent user experience regardless of the deployment environment.\n",
"\n",
"This will help you getting started with the Milvus Hybrid Search [retriever](/docs/concepts/retrievers), which combines the strengths of both dense and sparse vector search. For detailed documentation of all `MilvusCollectionHybridSearchRetriever` features and configurations head to the [API reference](https://python.langchain.com/api_reference/milvus/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html).\n",
"\n",
"See also the Milvus Multi-Vector Search [docs](https://milvus.io/docs/multi-vector-search.md).\n",
"\n",
"### Integration details\n",
"\n",
"import {ItemTable} from \"@theme/FeatureTables\";\n",
"\n",
"<ItemTable category=\"document_retrievers\" item=\"MilvusCollectionHybridSearchRetriever\" />\n",
"\n",
"## Setup\n",
"\n",
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"### Installation\n",
"\n",
"This retriever lives in the `langchain-milvus` package. This guide requires the following dependencies:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"%pip install --upgrade --quiet pymilvus[model] langchain-milvus langchain-openai"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.output_parsers import StrOutputParser\n",
"from langchain_core.prompts import PromptTemplate\n",
"from langchain_core.runnables import RunnablePassthrough\n",
"from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever\n",
"from langchain_milvus.utils.sparse import BM25SparseEmbedding\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from pymilvus import (\n",
" Collection,\n",
" CollectionSchema,\n",
" DataType,\n",
" FieldSchema,\n",
" WeightedRanker,\n",
" connections,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Start the Milvus service\n",
"\n",
"Please refer to the [Milvus documentation](https://milvus.io/docs/install_standalone-docker.md) to start the Milvus service.\n",
"\n",
"After starting milvus, you need to specify your milvus connection URI."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"CONNECTION_URI = \"http://localhost:19530\""
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"### Prepare OpenAI API Key\n",
"\n",
"Please refer to the [OpenAI documentation](https://platform.openai.com/account/api-keys) to obtain your OpenAI API key, and set it as an environment variable.\n",
"\n",
"```shell\n",
"export OPENAI_API_KEY=<your_api_key>\n",
"```\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Prepare dense and sparse embedding functions\n",
"\n",
"Let us fictionalize 10 fake descriptions of novels. In actual production, it may be a large amount of text data."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"texts = [\n",
" \"In 'The Whispering Walls' by Ava Moreno, a young journalist named Sophia uncovers a decades-old conspiracy hidden within the crumbling walls of an ancient mansion, where the whispers of the past threaten to destroy her own sanity.\",\n",
" \"In 'The Last Refuge' by Ethan Blackwood, a group of survivors must band together to escape a post-apocalyptic wasteland, where the last remnants of humanity cling to life in a desperate bid for survival.\",\n",
" \"In 'The Memory Thief' by Lila Rose, a charismatic thief with the ability to steal and manipulate memories is hired by a mysterious client to pull off a daring heist, but soon finds themselves trapped in a web of deceit and betrayal.\",\n",
" \"In 'The City of Echoes' by Julian Saint Clair, a brilliant detective must navigate a labyrinthine metropolis where time is currency, and the rich can live forever, but at a terrible cost to the poor.\",\n",
" \"In 'The Starlight Serenade' by Ruby Flynn, a shy astronomer discovers a mysterious melody emanating from a distant star, which leads her on a journey to uncover the secrets of the universe and her own heart.\",\n",
" \"In 'The Shadow Weaver' by Piper Redding, a young orphan discovers she has the ability to weave powerful illusions, but soon finds herself at the center of a deadly game of cat and mouse between rival factions vying for control of the mystical arts.\",\n",
" \"In 'The Lost Expedition' by Caspian Grey, a team of explorers ventures into the heart of the Amazon rainforest in search of a lost city, but soon finds themselves hunted by a ruthless treasure hunter and the treacherous jungle itself.\",\n",
" \"In 'The Clockwork Kingdom' by Augusta Wynter, a brilliant inventor discovers a hidden world of clockwork machines and ancient magic, where a rebellion is brewing against the tyrannical ruler of the land.\",\n",
" \"In 'The Phantom Pilgrim' by Rowan Welles, a charismatic smuggler is hired by a mysterious organization to transport a valuable artifact across a war-torn continent, but soon finds themselves pursued by deadly assassins and rival factions.\",\n",
" \"In 'The Dreamwalker's Journey' by Lyra Snow, a young dreamwalker discovers she has the ability to enter people's dreams, but soon finds herself trapped in a surreal world of nightmares and illusions, where the boundaries between reality and fantasy blur.\",\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We will use the [OpenAI Embedding](https://platform.openai.com/docs/guides/embeddings) to generate dense vectors, and the [BM25 algorithm](https://en.wikipedia.org/wiki/Okapi_BM25) to generate sparse vectors.\n",
"\n",
"Initialize dense embedding function and get dimension"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1536"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dense_embedding_func = OpenAIEmbeddings()\n",
"dense_dim = len(dense_embedding_func.embed_query(texts[1]))\n",
"dense_dim"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Initialize sparse embedding function.\n",
"\n",
"Note that the output of sparse embedding is a set of sparse vectors, which represents the index and weight of the keywords of the input text."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0: 0.4270424944042204,\n",
" 21: 1.845826690498331,\n",
" 22: 1.845826690498331,\n",
" 23: 1.845826690498331,\n",
" 24: 1.845826690498331,\n",
" 25: 1.845826690498331,\n",
" 26: 1.845826690498331,\n",
" 27: 1.2237754316221157,\n",
" 28: 1.845826690498331,\n",
" 29: 1.845826690498331,\n",
" 30: 1.845826690498331,\n",
" 31: 1.845826690498331,\n",
" 32: 1.845826690498331,\n",
" 33: 1.845826690498331,\n",
" 34: 1.845826690498331,\n",
" 35: 1.845826690498331,\n",
" 36: 1.845826690498331,\n",
" 37: 1.845826690498331,\n",
" 38: 1.845826690498331,\n",
" 39: 1.845826690498331}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sparse_embedding_func = BM25SparseEmbedding(corpus=texts)\n",
"sparse_embedding_func.embed_query(texts[1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create Milvus Collection and load data\n",
"\n",
"Initialize connection URI and establish connection"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"connections.connect(uri=CONNECTION_URI)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define field names and their data types"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"pk_field = \"doc_id\"\n",
"dense_field = \"dense_vector\"\n",
"sparse_field = \"sparse_vector\"\n",
"text_field = \"text\"\n",
"fields = [\n",
" FieldSchema(\n",
" name=pk_field,\n",
" dtype=DataType.VARCHAR,\n",
" is_primary=True,\n",
" auto_id=True,\n",
" max_length=100,\n",
" ),\n",
" FieldSchema(name=dense_field, dtype=DataType.FLOAT_VECTOR, dim=dense_dim),\n",
" FieldSchema(name=sparse_field, dtype=DataType.SPARSE_FLOAT_VECTOR),\n",
" FieldSchema(name=text_field, dtype=DataType.VARCHAR, max_length=65_535),\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a collection with the defined schema"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"schema = CollectionSchema(fields=fields, enable_dynamic_field=False)\n",
"collection = Collection(\n",
" name=\"IntroductionToTheNovels\", schema=schema, consistency_level=\"Strong\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define index for dense and sparse vectors"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"dense_index = {\"index_type\": \"FLAT\", \"metric_type\": \"IP\"}\n",
"collection.create_index(\"dense_vector\", dense_index)\n",
"sparse_index = {\"index_type\": \"SPARSE_INVERTED_INDEX\", \"metric_type\": \"IP\"}\n",
"collection.create_index(\"sparse_vector\", sparse_index)\n",
"collection.flush()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Insert entities into the collection and load the collection"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"entities = []\n",
"for text in texts:\n",
" entity = {\n",
" dense_field: dense_embedding_func.embed_documents([text])[0],\n",
" sparse_field: sparse_embedding_func.embed_documents([text])[0],\n",
" text_field: text,\n",
" }\n",
" entities.append(entity)\n",
"collection.insert(entities)\n",
"collection.load()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Instantiation\n",
"\n",
"Now we can instantiate our retriever, defining search parameters for sparse and dense fields:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sparse_search_params = {\"metric_type\": \"IP\"}\n",
"dense_search_params = {\"metric_type\": \"IP\", \"params\": {}}\n",
"retriever = MilvusCollectionHybridSearchRetriever(\n",
" collection=collection,\n",
" rerank=WeightedRanker(0.5, 0.5),\n",
" anns_fields=[dense_field, sparse_field],\n",
" field_embeddings=[dense_embedding_func, sparse_embedding_func],\n",
" field_search_params=[dense_search_params, sparse_search_params],\n",
" top_k=3,\n",
" text_field=text_field,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"In the input parameters of this Retriever, we use a dense embedding and a sparse embedding to perform hybrid search on the two fields of this Collection, and use WeightedRanker for reranking. Finally, 3 top-K Documents will be returned."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Usage"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content=\"In 'The Lost Expedition' by Caspian Grey, a team of explorers ventures into the heart of the Amazon rainforest in search of a lost city, but soon finds themselves hunted by a ruthless treasure hunter and the treacherous jungle itself.\", metadata={'doc_id': '449281835035545843'}),\n",
" Document(page_content=\"In 'The Phantom Pilgrim' by Rowan Welles, a charismatic smuggler is hired by a mysterious organization to transport a valuable artifact across a war-torn continent, but soon finds themselves pursued by deadly assassins and rival factions.\", metadata={'doc_id': '449281835035545845'}),\n",
" Document(page_content=\"In 'The Dreamwalker's Journey' by Lyra Snow, a young dreamwalker discovers she has the ability to enter people's dreams, but soon finds herself trapped in a surreal world of nightmares and illusions, where the boundaries between reality and fantasy blur.\", metadata={'doc_id': '449281835035545846'})]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.invoke(\"What are the story about ventures?\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use within a chain\n",
"\n",
"Initialize ChatOpenAI and define a prompt template"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"llm = ChatOpenAI()\n",
"\n",
"PROMPT_TEMPLATE = \"\"\"\n",
"Human: You are an AI assistant, and provides answers to questions by using fact based and statistical information when possible.\n",
"Use the following pieces of information to provide a concise answer to the question enclosed in <question> tags.\n",
"\n",
"<context>\n",
"{context}\n",
"</context>\n",
"\n",
"<question>\n",
"{question}\n",
"</question>\n",
"\n",
"Assistant:\"\"\"\n",
"\n",
"prompt = PromptTemplate(\n",
" template=PROMPT_TEMPLATE, input_variables=[\"context\", \"question\"]\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"Define a function for formatting documents"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def format_docs(docs):\n",
" return \"\\n\\n\".join(doc.page_content for doc in docs)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"Define a chain using the retriever and other components"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"rag_chain = (\n",
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
" | prompt\n",
" | llm\n",
" | StrOutputParser()\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"Perform a query using the defined chain"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"\"Lila Rose has written 'The Memory Thief,' which follows a charismatic thief with the ability to steal and manipulate memories as they navigate a daring heist and a web of deceit and betrayal.\""
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rag_chain.invoke(\"What novels has Lila written and what are their contents?\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"Drop the collection"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"collection.drop()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## API reference\n",
"\n",
"For detailed documentation of all `MilvusCollectionHybridSearchRetriever` features and configurations head to the [API reference](https://python.langchain.com/api_reference/milvus/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html)."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -0,0 +1,322 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"source": [
"# Pinecone Rerank\n",
"\n",
"> This notebook shows how to use **PineconeRerank** for two-stage vector retrieval reranking using Pinecone's hosted reranking API as demonstrated in `langchain_pinecone/libs/pinecone/rerank.py`."
]
},
{
"cell_type": "markdown",
"id": "acae54e37e7d407bbb7b55eff062a284",
"metadata": {},
"source": [
"## Setup\n",
"Install the `langchain-pinecone` package."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a63283cbaf04dbcab1f6479b197f3a8",
"metadata": {},
"outputs": [],
"source": [
"%pip install -qU \"langchain-pinecone\""
]
},
{
"cell_type": "markdown",
"id": "8dd0d8092fe74a7c96281538738b07e2",
"metadata": {},
"source": [
"## Credentials\n",
"Set your Pinecone API key to use the reranking API."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72eea5119410473aa328ad9291626812",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from getpass import getpass\n",
"\n",
"os.environ[\"PINECONE_API_KEY\"] = os.getenv(\"PINECONE_API_KEY\") or getpass(\n",
" \"Enter your Pinecone API key: \"\n",
")"
]
},
{
"cell_type": "markdown",
"id": "8edb47106e1a46a883d545849b8ab81b",
"metadata": {},
"source": [
"## Instantiation\n",
"Use `PineconeRerank` to rerank a list of documents by relevance to a query."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "10185d26023b46108eb7d9f57d49d2b3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/jakit/customers/aurelio/langchain-pinecone/libs/pinecone/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Score: 0.9998 | Content: Paris is the capital of France.\n",
"Score: 0.1950 | Content: The Eiffel Tower is in Paris.\n",
"Score: 0.0042 | Content: Berlin is the capital of Germany.\n"
]
}
],
"source": [
"from langchain_core.documents import Document\n",
"from langchain_pinecone import PineconeRerank\n",
"\n",
"# Initialize reranker\n",
"reranker = PineconeRerank(model=\"bge-reranker-v2-m3\")\n",
"\n",
"# Sample documents\n",
"documents = [\n",
" Document(page_content=\"Paris is the capital of France.\"),\n",
" Document(page_content=\"Berlin is the capital of Germany.\"),\n",
" Document(page_content=\"The Eiffel Tower is in Paris.\"),\n",
"]\n",
"\n",
"# Rerank documents\n",
"query = \"What is the capital of France?\"\n",
"reranked_docs = reranker.compress_documents(documents, query)\n",
"\n",
"# Print results\n",
"for doc in reranked_docs:\n",
" score = doc.metadata.get(\"relevance_score\")\n",
" print(f\"Score: {score:.4f} | Content: {doc.page_content}\")"
]
},
{
"cell_type": "markdown",
"id": "8763a12b2bbd4a93a75aff182afb95dc",
"metadata": {},
"source": [
"## Usage\n",
"### Reranking with Top-N\n",
"Specify `top_n` to limit the number of returned documents."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7623eae2785240b9bd12b16a66d81610",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top-1 Result:\n",
"Score: 0.9998 | Content: Paris is the capital of France.\n"
]
}
],
"source": [
"# Return only top-1 result\n",
"reranker_top1 = PineconeRerank(model=\"bge-reranker-v2-m3\", top_n=1)\n",
"top1_docs = reranker_top1.compress_documents(documents, query)\n",
"print(\"Top-1 Result:\")\n",
"for doc in top1_docs:\n",
" print(f\"Score: {doc.metadata['relevance_score']:.4f} | Content: {doc.page_content}\")"
]
},
{
"cell_type": "markdown",
"id": "7cdc8c89c7104fffa095e18ddfef8986",
"metadata": {},
"source": [
"## Reranking with Custom Rank Fields\n",
"If your documents are dictionaries or have custom fields, use `rank_fields` to specify the field to rank on."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b118ea5561624da68c537baed56e602f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ID: doc3 | Score: 0.9892\n",
"ID: doc1 | Score: 0.0006\n",
"ID: doc2 | Score: 0.0000\n"
]
}
],
"source": [
"# Sample dictionary documents with 'text' field\n",
"docs_dict = [\n",
" {\n",
" \"id\": \"doc1\",\n",
" \"text\": \"Article about renewable energy.\",\n",
" \"title\": \"Renewable Energy\",\n",
" },\n",
" {\"id\": \"doc2\", \"text\": \"Report on economic growth.\", \"title\": \"Economic Growth\"},\n",
" {\n",
" \"id\": \"doc3\",\n",
" \"text\": \"News on climate policy changes.\",\n",
" \"title\": \"Climate Policy\",\n",
" },\n",
"]\n",
"\n",
"# Initialize reranker with rank_fields\n",
"reranker_text = PineconeRerank(model=\"bge-reranker-v2-m3\", rank_fields=[\"text\"])\n",
"climate_docs = reranker_text.rerank(docs_dict, \"Latest news on climate change.\")\n",
"\n",
"# Show IDs and scores\n",
"for res in climate_docs:\n",
" print(f\"ID: {res['id']} | Score: {res['score']:.4f}\")"
]
},
{
"cell_type": "markdown",
"id": "a80bb6c3",
"metadata": {},
"source": [
"We can rerank based on title field"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a6f2768e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ID: doc2 | Score: 0.8918 | Title: Economic Growth\n",
"ID: doc3 | Score: 0.0002 | Title: Climate Policy\n",
"ID: doc1 | Score: 0.0000 | Title: Renewable Energy\n"
]
}
],
"source": [
"economic_docs = reranker_text.rerank(docs_dict, \"Economic forecast.\")\n",
"\n",
"# Show IDs and scores\n",
"for res in economic_docs:\n",
" print(\n",
" f\"ID: {res['id']} | Score: {res['score']:.4f} | Title: {res['document']['title']}\"\n",
" )"
]
},
{
"cell_type": "markdown",
"id": "938c804e27f84196a10c8828c723f798",
"metadata": {},
"source": [
"## Reranking with Additional Parameters\n",
"You can pass model-specific parameters (e.g., `truncate`) directly to `.rerank()`."
]
},
{
"cell_type": "markdown",
"id": "a94c501c",
"metadata": {},
"source": [
"How to handle inputs longer than those supported by the model. Accepted values: END or NONE.\n",
"END truncates the input sequence at the input token limit. NONE returns an error when the input exceeds the input token limit."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "504fb2a444614c0babb325280ed9130a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ID: docA | Score: 0.6950\n",
"ID: docB | Score: 0.0001\n"
]
}
],
"source": [
"# Rerank with custom truncate parameter\n",
"docs_simple = [\n",
" {\"id\": \"docA\", \"text\": \"Quantum entanglement is a physical phenomenon...\"},\n",
" {\"id\": \"docB\", \"text\": \"Classical mechanics describes motion...\"},\n",
"]\n",
"\n",
"reranked = reranker.rerank(\n",
" documents=docs_simple,\n",
" query=\"Explain the concept of quantum entanglement.\",\n",
" truncate=\"END\",\n",
")\n",
"# Print reranked IDs and scores\n",
"for res in reranked:\n",
" print(f\"ID: {res['id']} | Score: {res['score']:.4f}\")"
]
},
{
"cell_type": "markdown",
"id": "ab78bcd8",
"metadata": {},
"source": [
"## Use within a chain"
]
},
{
"cell_type": "markdown",
"id": "59bbdb311c014d738909a11f9e486628",
"metadata": {},
"source": [
"## API reference\n",
"- `PineconeRerank(model, top_n, rank_fields, return_documents)`\n",
"- `.rerank(documents, query, rank_fields=None, model=None, top_n=None, truncate=\"END\")`\n",
"- `.compress_documents(documents, query)` (returns `Document` objects with `relevance_score` in metadata)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -6,7 +6,9 @@
"source": [
"# Astra DB\n",
"\n",
">[DataStax Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on `Cassandra` and made conveniently available through an easy-to-use JSON API.\n",
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
"> through an easy-to-use JSON API.\n",
"\n",
"In the walkthrough, we'll demo the `SelfQueryRetriever` with an `Astra DB` vector store."
]

View File

@@ -23,7 +23,9 @@
"\n",
"## Overview\n",
"\n",
"DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API.\n",
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
"> through an easy-to-use JSON API.\n",
"\n",
"### Integration details\n",
"\n",

View File

@@ -12,24 +12,36 @@
"\n",
">[Cloudflare Workers AI](https://developers.cloudflare.com/workers-ai/) allows you to run machine learning models, on the `Cloudflare` network, from your code via REST API.\n",
"\n",
">[Cloudflare AI document](https://developers.cloudflare.com/workers-ai/models/text-embeddings/) listed all text embeddings models available.\n",
">[Workers AI Developer Docs](https://developers.cloudflare.com/workers-ai/models/text-embeddings/) lists all text embeddings models available.\n",
"\n",
"## Setting up\n",
"\n",
"Both Cloudflare account ID and API token are required. Find how to obtain them from [this document](https://developers.cloudflare.com/workers-ai/get-started/rest-api/).\n"
"Both a Cloudflare Account ID and Workers AI API token are required. Find how to obtain them from [this document](https://developers.cloudflare.com/workers-ai/get-started/rest-api/).\n",
"\n",
"You can pass these parameters explicitly or define as environmental variables.\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 11,
"id": "f60023b8",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-13T06:00:30.121204Z",
"start_time": "2025-05-13T06:00:30.117936Z"
}
},
"outputs": [],
"source": [
"import getpass\n",
"import os\n",
"\n",
"my_account_id = getpass.getpass(\"Enter your Cloudflare account ID:\\n\\n\")\n",
"my_api_token = getpass.getpass(\"Enter your Cloudflare API token:\\n\\n\")"
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv(\".env\")\n",
"\n",
"cf_acct_id = os.getenv(\"CF_ACCOUNT_ID\")\n",
"\n",
"cf_ai_token = os.getenv(\"CF_AI_API_TOKEN\")"
]
},
{
@@ -42,9 +54,14 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 12,
"id": "92c5b61e",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-13T06:00:31.224996Z",
"start_time": "2025-05-13T06:00:31.222981Z"
}
},
"outputs": [],
"source": [
"from langchain_cloudflare.embeddings import (\n",
@@ -54,25 +71,28 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 13,
"id": "062547b9",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-13T06:00:32.515031Z",
"start_time": "2025-05-13T06:00:31.798590Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(384, [-0.033627357333898544, 0.03982774540781975, 0.03559349477291107])"
]
"text/plain": "(384, [-0.033660888671875, 0.039764404296875, 0.03558349609375])"
},
"execution_count": 3,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"embeddings = CloudflareWorkersAIEmbeddings(\n",
" account_id=my_account_id,\n",
" api_token=my_api_token,\n",
" account_id=cf_acct_id,\n",
" api_token=cf_ai_token,\n",
" model_name=\"@cf/baai/bge-small-en-v1.5\",\n",
")\n",
"# single string embeddings\n",
@@ -82,17 +102,20 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 14,
"id": "e1dcc4bd",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-13T06:00:33.106160Z",
"start_time": "2025-05-13T06:00:32.847232Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(3, 384)"
]
"text/plain": "(3, 384)"
},
"execution_count": 4,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -102,14 +125,6 @@
"batch_query_result = embeddings.embed_documents([\"test1\", \"test2\", \"test3\"])\n",
"len(batch_query_result), len(batch_query_result[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52de8b88",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

View File

@@ -1,120 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"# GigaChat\n",
"This notebook shows how to use LangChain with [GigaChat embeddings](https://developers.sber.ru/portal/products/gigachat).\n",
"To use you need to install ```gigachat``` python package."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"is_executing": true
}
},
"outputs": [],
"source": [
"%pip install --upgrade --quiet langchain-gigachat"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"To get GigaChat credentials you need to [create account](https://developers.sber.ru/studio/login) and [get access to API](https://developers.sber.ru/docs/ru/gigachat/individuals-quickstart)\n",
"\n",
"## Example"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"from getpass import getpass\n",
"\n",
"if \"GIGACHAT_CREDENTIALS\" not in os.environ:\n",
" os.environ[\"GIGACHAT_CREDENTIALS\"] = getpass()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from langchain_gigachat import GigaChatEmbeddings\n",
"\n",
"embeddings = GigaChatEmbeddings(verify_ssl_certs=False, scope=\"GIGACHAT_API_PERS\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"query_result = embeddings.embed_query(\"The quick brown fox jumps over the lazy dog\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": "[0.8398333191871643,\n -0.14180311560630798,\n -0.6161925792694092,\n -0.17103666067123413,\n 1.2884578704833984]"
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_result[:5]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,292 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# BrightDataWebScraperAPI"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[Bright Data](https://brightdata.com/) provides a powerful Web Scraper API that allows you to extract structured data from 100+ ppular domains, including Amazon product details, LinkedIn profiles, and more, making it particularly useful for AI agents requiring reliable structured web data feeds."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Overview"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Integration details\n",
"\n",
"|Class|Package|Serializable|JS support|Package latest|\n",
"|:--|:--|:-:|:-:|:-:|\n",
"|[BrightDataWebScraperAPI](https://pypi.org/project/langchain-brightdata/)|[langchain-brightdata](https://pypi.org/project/langchain-brightdata/)|✅|❌|![PyPI - Version](https://img.shields.io/pypi/v/langchain-brightdata?style=flat-square&label=%20)|\n",
"\n",
"### Tool features\n",
"\n",
"|Native async|Returns artifact|Return data|Pricing|\n",
"|:-:|:-:|:--|:-:|\n",
"|❌|❌|Structured data from websites (Amazon products, LinkedIn profiles, etc.)|Requires Bright Data account|\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup\n",
"\n",
"The integration lives in the `langchain-brightdata` package.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"pip install langchain-brightdata"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You'll need a Bright Data API key to use this tool. You can set it as an environment variable:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"BRIGHT_DATA_API_KEY\"] = \"your-api-key\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or pass it directly when initializing the tool:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataWebScraperAPI\n",
"\n",
"scraper_tool = BrightDataWebScraperAPI(bright_data_api_key=\"your-api-key\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Instantiation\n",
"\n",
"Here we show how to instantiate an instance of the BrightDataWebScraperAPI tool. This tool allows you to extract structured data from various websites including Amazon product details, LinkedIn profiles, and more using Bright Data's Dataset API.\n",
"\n",
"The tool accepts various parameters during instantiation:\n",
"\n",
"- `bright_data_api_key` (required, str): Your Bright Data API key for authentication.\n",
"- `dataset_mapping` (optional, Dict[str, str]): A dictionary mapping dataset types to their corresponding Bright Data dataset IDs. The default mapping includes:\n",
" - \"amazon_product\": \"gd_l7q7dkf244hwjntr0\"\n",
" - \"amazon_product_reviews\": \"gd_le8e811kzy4ggddlq\"\n",
" - \"linkedin_person_profile\": \"gd_l1viktl72bvl7bjuj0\"\n",
" - \"linkedin_company_profile\": \"gd_l1vikfnt1wgvvqz95w\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Invocation\n",
"\n",
"### Basic Usage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataWebScraperAPI\n",
"\n",
"# Initialize the tool\n",
"scraper_tool = BrightDataWebScraperAPI(\n",
" bright_data_api_key=\"your-api-key\" # Optional if set in environment variables\n",
")\n",
"\n",
"# Extract Amazon product data\n",
"results = scraper_tool.invoke(\n",
" {\"url\": \"https://www.amazon.com/dp/B08L5TNJHG\", \"dataset_type\": \"amazon_product\"}\n",
")\n",
"\n",
"print(results)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Advanced Usage with Parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataWebScraperAPI\n",
"\n",
"# Initialize with default parameters\n",
"scraper_tool = BrightDataWebScraperAPI(bright_data_api_key=\"your-api-key\")\n",
"\n",
"# Extract Amazon product data with location-specific pricing\n",
"results = scraper_tool.invoke(\n",
" {\n",
" \"url\": \"https://www.amazon.com/dp/B08L5TNJHG\",\n",
" \"dataset_type\": \"amazon_product\",\n",
" \"zipcode\": \"10001\", # Get pricing for New York City\n",
" }\n",
")\n",
"\n",
"print(results)\n",
"\n",
"# Extract LinkedIn profile data\n",
"linkedin_results = scraper_tool.invoke(\n",
" {\n",
" \"url\": \"https://www.linkedin.com/in/satyanadella/\",\n",
" \"dataset_type\": \"linkedin_person_profile\",\n",
" }\n",
")\n",
"\n",
"print(linkedin_results)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Customization Options\n",
"\n",
"The BrightDataWebScraperAPI tool accepts several parameters for customization:\n",
"\n",
"|Parameter|Type|Description|\n",
"|:--|:--|:--|\n",
"|`url`|str|The URL to extract data from|\n",
"|`dataset_type`|str|Type of dataset to use (e.g., \"amazon_product\")|\n",
"|`zipcode`|str|Optional zipcode for location-specific data|\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Available Dataset Types\n",
"\n",
"The tool supports the following dataset types for structured data extraction:\n",
"\n",
"|Dataset Type|Description|\n",
"|:--|:--|\n",
"|`amazon_product`|Extract detailed Amazon product data|\n",
"|`amazon_product_reviews`|Extract Amazon product reviews|\n",
"|`linkedin_person_profile`|Extract LinkedIn person profile data|\n",
"|`linkedin_company_profile`|Extract LinkedIn company profile data|\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use within an agent"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataWebScraperAPI\n",
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"# Initialize the LLM\n",
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\", google_api_key=\"your-api-key\")\n",
"\n",
"# Initialize the Bright Data Web Scraper API tool\n",
"scraper_tool = BrightDataWebScraperAPI(bright_data_api_key=\"your-api-key\")\n",
"\n",
"# Create the agent with the tool\n",
"agent = create_react_agent(llm, [scraper_tool])\n",
"\n",
"# Provide a user query\n",
"user_input = \"Scrape Amazon product data for https://www.amazon.com/dp/B0D2Q9397Y?th=1 in New York (zipcode 10001).\"\n",
"\n",
"# Stream the agent's step-by-step output\n",
"for step in agent.stream(\n",
" {\"messages\": user_input},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## API reference\n",
"\n",
"- [Bright Data API Documentation](https://docs.brightdata.com/scraping-automation/web-scraper-api/overview)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,294 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "a6f91f20",
"metadata": {},
"source": [
"# BrightDataSERP\n",
"\n",
"[Bright Data](https://brightdata.com/) provides a powerful SERP API that allows you to query search engines (Google,Bing.DuckDuckGo,Yandex) with geo-targeting and advanced customization options, particularly useful for AI agents requiring real-time web information.\n",
"\n",
"\n",
"## Overview\n",
"\n",
"### Integration details\n",
"\n",
"\n",
"|Class|Package|Serializable|JS support|Package latest|\n",
"|:--|:--|:-:|:-:|:-:|\n",
"|[BrightDataSERP](https://pypi.org/project/langchain-brightdata/)|[langchain-brightdata](https://pypi.org/project/langchain-brightdata/)|✅|❌|![PyPI - Version](https://img.shields.io/pypi/v/langchain-brightdata?style=flat-square&label=%20)|\n",
"\n",
"\n",
"### Tool features\n",
"\n",
"\n",
"|Native async|Returns artifact|Return data|Pricing|\n",
"|:-:|:-:|:--|:-:|\n",
"|❌|❌|Title, URL, snippet, position, and other search result data|Requires Bright Data account|\n",
"\n",
"\n",
"\n",
"## Setup\n",
"\n",
"The integration lives in the `langchain-brightdata` package."
]
},
{
"cell_type": "raw",
"id": "f85b4089",
"metadata": {},
"source": [
"pip install langchain-brightdata"
]
},
{
"cell_type": "markdown",
"id": "b15e9266",
"metadata": {},
"source": [
"### Credentials\n",
"\n",
"You'll need a Bright Data API key to use this tool. You can set it as an environment variable:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"BRIGHT_DATA_API_KEY\"] = \"your-api-key\""
]
},
{
"cell_type": "markdown",
"id": "bc5ab717-fd27-4c59-b912-bdd099541478",
"metadata": {},
"source": [
"Or pass it directly when initializing the tool:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a6c2f136-6367-4f1f-825d-ae741e1bf281",
"metadata": {},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataSERP\n",
"\n",
"serp_tool = BrightDataSERP(bright_data_api_key=\"your-api-key\")"
]
},
{
"cell_type": "markdown",
"id": "eed8cfcc",
"metadata": {},
"source": [
"## Instantiation\n",
"\n",
"Here we show how to instantiate an instance of the BrightDataSERP tool. This tool allows you to perform search engine queries with various customization options including geo-targeting, language preferences, device type simulation, and specific search types using Bright Data's SERP API.\n",
"\n",
"The tool accepts various parameters during instantiation:\n",
"\n",
"- `bright_data_api_key` (required, str): Your Bright Data API key for authentication.\n",
"- `search_engine` (optional, str): Search engine to use for queries. Default is \"google\". Other options include \"bing\", \"yahoo\", \"yandex\", \"DuckDuckGo\" etc.\n",
"- `country` (optional, str): Two-letter country code for localized search results (e.g., \"us\", \"gb\", \"de\", \"jp\"). Default is \"us\".\n",
"- `language` (optional, str): Two-letter language code for the search results (e.g., \"en\", \"es\", \"fr\", \"de\"). Default is \"en\".\n",
"- `results_count` (optional, int): Number of search results to return. Default is 10. Maximum value is typically 100.\n",
"- `search_type` (optional, str): Type of search to perform. Options include:\n",
" - None (default): Regular web search\n",
" - \"isch\": Images search\n",
" - \"shop\": Shopping search\n",
" - \"nws\": News search\n",
" - \"jobs\": Jobs search\n",
"- `device_type` (optional, str): Device type to simulate for the search. Options include:\n",
" - None (default): Desktop device\n",
" - \"mobile\": Generic mobile device\n",
" - \"ios\": iOS device (iPhone)\n",
" - \"android\": Android device\n",
"- `parse_results` (optional, bool): Whether to return parsed JSON results. Default is False, which returns raw HTML response."
]
},
{
"cell_type": "markdown",
"id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f",
"metadata": {},
"source": [
"## Invocation"
]
},
{
"cell_type": "markdown",
"id": "902dc1fd",
"metadata": {},
"source": [
"### Basic Usage"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64",
"metadata": {},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataSERP\n",
"\n",
"# Initialize the tool\n",
"serp_tool = BrightDataSERP(\n",
" bright_data_api_key=\"your-api-key\" # Optional if set in environment variables\n",
")\n",
"\n",
"# Run a basic search\n",
"results = serp_tool.invoke(\"latest AI research papers\")\n",
"\n",
"print(results)"
]
},
{
"cell_type": "markdown",
"id": "74147a1a",
"metadata": {},
"source": [
"### Advanced Usage with Parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc",
"metadata": {},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataSERP\n",
"\n",
"# Initialize with default parameters\n",
"serp_tool = BrightDataSERP(\n",
" bright_data_api_key=\"your-api-key\",\n",
" search_engine=\"google\", # Default\n",
" country=\"us\", # Default\n",
" language=\"en\", # Default\n",
" results_count=10, # Default\n",
" parse_results=True, # Get structured JSON results\n",
")\n",
"\n",
"# Use with specific parameters for this search\n",
"results = serp_tool.invoke(\n",
" {\n",
" \"query\": \"best electric vehicles\",\n",
" \"country\": \"de\", # Get results as if searching from Germany\n",
" \"language\": \"de\", # Get results in German\n",
" \"search_type\": \"shop\", # Get shopping results\n",
" \"device_type\": \"mobile\", # Simulate a mobile device\n",
" \"results_count\": 15,\n",
" }\n",
")\n",
"\n",
"print(results)"
]
},
{
"cell_type": "markdown",
"id": "d6e73897",
"metadata": {},
"source": [
"## Customization Options\n",
"\n",
"The BrightDataSERP tool accepts several parameters for customization:\n",
"\n",
"|Parameter|Type|Description|\n",
"|:--|:--|:--|\n",
"|`query`|str|The search query to perform|\n",
"|`search_engine`|str|Search engine to use (default: \"google\")|\n",
"|`country`|str|Two-letter country code for localized results (default: \"us\")|\n",
"|`language`|str|Two-letter language code (default: \"en\")|\n",
"|`results_count`|int|Number of results to return (default: 10)|\n",
"|`search_type`|str|Type of search: None (web), \"isch\" (images), \"shop\", \"nws\" (news), \"jobs\"|\n",
"|`device_type`|str|Device type: None (desktop), \"mobile\", \"ios\", \"android\"|\n",
"|`parse_results`|bool|Whether to return structured JSON (default: False)|\n"
]
},
{
"cell_type": "markdown",
"id": "e3353ce6",
"metadata": {},
"source": [
"## Use within an agent"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c91c32f",
"metadata": {},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataSERP\n",
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"# Initialize the LLM\n",
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\", google_api_key=\"your-api-key\")\n",
"\n",
"# Initialize the Bright Data SERP tool\n",
"serp_tool = BrightDataSERP(\n",
" bright_data_api_key=\"your-api-key\",\n",
" search_engine=\"google\",\n",
" country=\"us\",\n",
" language=\"en\",\n",
" results_count=10,\n",
" parse_results=True,\n",
")\n",
"\n",
"# Create the agent\n",
"agent = create_react_agent(llm, [serp_tool])\n",
"\n",
"# Provide a user query\n",
"user_input = \"Search for 'best electric vehicles' shopping results in Germany in German using mobile.\"\n",
"\n",
"# Stream the agent's output step-by-step\n",
"for step in agent.stream(\n",
" {\"messages\": user_input},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "markdown",
"id": "e8dec55a",
"metadata": {},
"source": [
"## API reference\n",
"\n",
"- [Bright Data API Documentation](https://docs.brightdata.com/scraping-automation/serp-api/introduction)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,314 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# BrightDataUnlocker"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[Bright Data](https://brightdata.com/) provides a powerful Web Unlocker API that allows you to access websites that might be protected by anti-bot measures, geo-restrictions, or other access limitations, making it particularly useful for AI agents requiring reliable web content extraction."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Overview"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Integration details"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"|Class|Package|Serializable|JS support|Package latest|\n",
"|:--|:--|:-:|:-:|:-:|\n",
"|[BrightDataUnlocker](https://pypi.org/project/langchain-brightdata/)|[langchain-brightdata](https://pypi.org/project/langchain-brightdata/)|✅|❌|![PyPI - Version](https://img.shields.io/pypi/v/langchain-brightdata?style=flat-square&label=%20)|\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Tool features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"|Native async|Returns artifact|Return data|Pricing|\n",
"|:-:|:-:|:--|:-:|\n",
"|❌|❌|HTML, Markdown, or screenshot of web pages|Requires Bright Data account|\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The integration lives in the `langchain-brightdata` package."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"pip install langchain-brightdata"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You'll need a Bright Data API key to use this tool. You can set it as an environment variable:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"BRIGHT_DATA_API_KEY\"] = \"your-api-key\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or pass it directly when initializing the tool:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataUnlocker\n",
"\n",
"unlocker_tool = BrightDataUnlocker(bright_data_api_key=\"your-api-key\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Instantiation\n",
"\n",
"Here we show how to instantiate an instance of the BrightDataUnlocker tool. This tool allows you to access websites that may be protected by anti-bot measures, geo-restrictions, or other access limitations using Bright Data's Web Unlocker service.\n",
"\n",
"The tool accepts various parameters during instantiation:\n",
"\n",
"- `bright_data_api_key` (required, str): Your Bright Data API key for authentication.\n",
"- `format` (optional, Literal[\"raw\"]): Format of the response content. Default is \"raw\".\n",
"- `country` (optional, str): Two-letter country code for geo-specific access (e.g., \"us\", \"gb\", \"de\", \"jp\"). Set this when you need to view the website as if accessing from a specific country. Default is None.\n",
"- `zone` (optional, str): Bright Data zone to use for the request. The \"unlocker\" zone is optimized for accessing websites that might block regular requests. Default is \"unlocker\".\n",
"- `data_format` (optional, Literal[\"html\", \"markdown\", \"screenshot\"]): Output format for the retrieved content. Options include:\n",
" - \"html\" - Returns the standard HTML content (default)\n",
" - \"markdown\" - Returns content converted to markdown format\n",
" - \"screenshot\" - Returns a PNG screenshot of the rendered page"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Invocation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Basic Usage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataUnlocker\n",
"\n",
"# Initialize the tool\n",
"unlocker_tool = BrightDataUnlocker(\n",
" bright_data_api_key=\"your-api-key\" # Optional if set in environment variables\n",
")\n",
"\n",
"# Access a webpage\n",
"result = unlocker_tool.invoke(\"https://example.com\")\n",
"\n",
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Advanced Usage with Parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataUnlocker\n",
"\n",
"unlocker_tool = BrightDataUnlocker(\n",
" bright_data_api_key=\"your-api-key\",\n",
")\n",
"\n",
"# Access a webpage with specific parameters\n",
"result = unlocker_tool.invoke(\n",
" {\n",
" \"url\": \"https://example.com/region-restricted-content\",\n",
" \"country\": \"gb\", # Access as if from Great Britain\n",
" \"data_format\": \"html\", # Get content in markdown format\n",
" \"zone\": \"unlocker\", # Use the unlocker zone\n",
" }\n",
")\n",
"\n",
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Customization Options"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The BrightDataUnlocker tool accepts several parameters for customization:\n",
"\n",
"|Parameter|Type|Description|\n",
"|:--|:--|:--|\n",
"|`url`|str|The URL to access|\n",
"|`format`|str|Format of the response content (default: \"raw\")|\n",
"|`country`|str|Two-letter country code for geo-specific access (e.g., \"us\", \"gb\")|\n",
"|`zone`|str|Bright Data zone to use (default: \"unlocker\")|\n",
"|`data_format`|str|Output format: None (HTML), \"markdown\", or \"screenshot\"|\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data Format Options\n",
"\n",
"The `data_format` parameter allows you to specify how the content should be returned:\n",
"\n",
"- `None` or `\"html\"` (default): Returns the standard HTML content of the page\n",
"- `\"markdown\"`: Returns the content converted to markdown format, which is useful for feeding directly to LLMs\n",
"- `\"screenshot\"`: Returns a PNG screenshot of the rendered page, useful for visual analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use within an agent"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from langchain_brightdata import BrightDataUnlocker\n",
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"# Initialize the LLM\n",
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\", google_api_key=\"your-api-key\")\n",
"\n",
"# Initialize the tool\n",
"bright_data_tool = BrightDataUnlocker(bright_data_api_key=\"your-api-key\")\n",
"\n",
"# Create the agent\n",
"agent = create_react_agent(llm, [bright_data_tool])\n",
"\n",
"# Input URLs or prompt\n",
"user_input = \"Get the content from https://example.com/region-restricted-page - access it from GB\"\n",
"\n",
"# Stream the agent's output step by step\n",
"for step in agent.stream(\n",
" {\"messages\": user_input},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## API reference\n",
"\n",
"- [Bright Data API Documentation](https://docs.brightdata.com/scraping-automation/web-unlocker/introduction)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -16,7 +16,7 @@
"1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
"\n",
"## Instructions for retrieving your Google Docs data\n",
"By default, the `GoogleDriveTools` and `GoogleDriveWrapper` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `GOOGLE_ACCOUNT_FILE` environment variable. \n",
"By default, the `GoogleDriveTools` and `GoogleDriveWrapper` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable by setting the `GOOGLE_ACCOUNT_FILE` environment variable to your `custom/path/to/credentials.json`. \n",
"The location of `token.json` use the same directory (or use the parameter `token_path`). Note that `token.json` will be created automatically the first time you use the tool.\n",
"\n",
"`GoogleDriveSearchTool` can retrieve a selection of files with some requests. \n",
@@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -88,7 +88,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install --upgrade --quiet unstructured"
"%pip install --upgrade --quiet unstructured langchain-googledrive"
]
},
{
@@ -99,9 +99,13 @@
},
"outputs": [],
"source": [
"import os\n",
"\n",
"from langchain_googledrive.tools.google_drive.tool import GoogleDriveSearchTool\n",
"from langchain_googledrive.utilities.google_drive import GoogleDriveAPIWrapper\n",
"\n",
"os.environ[\"GOOGLE_ACCOUNT_FILE\"] = \"custom/path/to/credentials.json\"\n",
"\n",
"# By default, search only in the filename.\n",
"tool = GoogleDriveSearchTool(\n",
" api_wrapper=GoogleDriveAPIWrapper(\n",
@@ -114,7 +118,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -134,33 +138,52 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"\"A wrapper around Google Drive Search. Useful for when you need to find a document in google drive. The input should be formatted as a list of entities separated with a space. As an example, a list of keywords is 'hello word'.\""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tool.description"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import load_tools\n",
"\n",
"tools = load_tools(\n",
" [\"google-drive-search\"],\n",
" folder_id=folder_id,\n",
" template=\"gdrive-query-in-folder\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use within an Agent"
"## Use the tool within a ReAct agent"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In order to create an agent that uses the Google Jobs tool install Langgraph"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install --upgrade --quiet langgraph langchain-openai"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"and use the `create_react_agent` functionality to initialize a ReAct agent. You will also need to set up your OPEN_API_KEY (visit https://platform.openai.com) in order to access OpenAI's chat models."
]
},
{
@@ -171,32 +194,29 @@
},
"outputs": [],
"source": [
"from langchain.agents import AgentType, initialize_agent\n",
"from langchain_openai import OpenAI\n",
"import os\n",
"\n",
"llm = OpenAI(temperature=0)\n",
"agent = initialize_agent(\n",
" tools=tools,\n",
" llm=llm,\n",
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent.run(\"Search in google drive, who is 'Yann LeCun' ?\")"
"from langchain.chat_models import init_chat_model\n",
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\"\n",
"\n",
"\n",
"llm = init_chat_model(\"gpt-4o-mini\", model_provider=\"openai\", temperature=0)\n",
"agent = create_react_agent(llm, tools=[tool])\n",
"\n",
"events = agent.stream(\n",
" {\"messages\": [(\"user\", \"Search in google drive, who is 'Yann LeCun' ?\")]},\n",
" stream_mode=\"values\",\n",
")\n",
"for event in events:\n",
" event[\"messages\"][-1].pretty_print()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "venv",
"language": "python",
"name": "python3"
},
@@ -210,7 +230,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.12.7"
}
},
"nbformat": 4,

View File

@@ -6,55 +6,76 @@
"source": [
"# Google Finance\n",
"\n",
"This notebook goes over how to use the Google Finance Tool to get information from the Google Finance page\n",
"This notebook goes over how to use the Google Finance Tool to get information from the Google Finance page.\n",
"\n",
"To get an SerpApi key key, sign up at: https://serpapi.com/users/sign_up.\n",
"\n",
"Then install google-search-results with the command: \n",
"\n",
"pip install google-search-results\n",
"\n",
"Then set the environment variable SERPAPI_API_KEY to your SerpApi key\n",
"\n",
"Or pass the key in as a argument to the wrapper serp_api_key=\"your secret key\""
"To use the tool with Langchain install following packages"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade --quiet google-search-results langchain-community"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use the Tool"
"Then set the environment variable SERPAPI_API_KEY to your SerpApi key or pass the key in as a argument to the wrapper serp_api_key=\"your secret key\"."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install --upgrade --quiet google-search-results langchain-community"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"SERPAPI_API_KEY\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.tools.google_finance import GoogleFinanceQueryRun\n",
"from langchain_community.utilities.google_finance import GoogleFinanceAPIWrapper\n",
"\n",
"os.environ[\"SERPAPI_API_KEY\"] = \"\"\n",
"tool = GoogleFinanceQueryRun(api_wrapper=GoogleFinanceAPIWrapper())"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'\\nQuery: Google\\nstock: GOOGL:NASDAQ\\nprice: $159.96\\npercentage: 0.94\\nmovement: Up\\nus: price = 42210.57, movement = Down\\neurope: price = 23638.56, movement = Up\\nasia: price = 38183.26, movement = Up\\n'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tool.run(\"Google\")"
]
@@ -63,7 +84,115 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Using it with Langchain"
"In order to create an agent that uses the Google Finance tool install Langgraph"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade --quiet langgraph langchain-openai"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"and use the `create_react_agent` functionality to initialize a ReAct agent. You will also need to set up your OPEN_API_KEY (visit https://platform.openai.com) in order to access OpenAI's chat models."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"SERP_API_KEY\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import init_chat_model\n",
"\n",
"llm = init_chat_model(\"gpt-4o-mini\", model_provider=\"openai\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.agent_toolkits.load_tools import load_tools\n",
"\n",
"tools = load_tools([\"google-scholar\", \"google-finance\"], llm=llm)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What is Google's stock?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" google_finance (call_8m0txCtxNuQaAv9UlomPhSA1)\n",
" Call ID: call_8m0txCtxNuQaAv9UlomPhSA1\n",
" Args:\n",
" query: Google\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: google_finance\n",
"\n",
"\n",
"Query: Google\n",
"stock: GOOGL:NASDAQ\n",
"price: $159.96\n",
"percentage: 0.94\n",
"movement: Up\n",
"us: price = 42210.57, movement = Down\n",
"europe: price = 23638.56, movement = Up\n",
"asia: price = 38183.26, movement = Up\n",
"\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"Google's stock, listed as GOOGL on NASDAQ, is currently priced at $159.96, with a movement up by 0.94%.\n"
]
}
],
"source": [
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"agent = create_react_agent(llm, tools)\n",
"\n",
"events = agent.stream(\n",
" {\"messages\": [(\"user\", \"What is Google's stock?\")]},\n",
" stream_mode=\"values\",\n",
")\n",
"for event in events:\n",
" event[\"messages\"][-1].pretty_print()"
]
},
{
@@ -71,26 +200,12 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
"from langchain_openai import OpenAI\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"SERP_API_KEY\"] = \"\"\n",
"llm = OpenAI()\n",
"tools = load_tools([\"google-scholar\", \"google-finance\"], llm=llm)\n",
"agent = initialize_agent(\n",
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
")\n",
"agent.run(\"what is google's stock\")"
]
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -104,9 +219,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -12,33 +12,49 @@
"\n",
"This Jupyter Notebook demonstrates how to use the `GraphQLAPIWrapper` component with an Agent.\n",
"\n",
"In this example, we'll be using the public `Star Wars GraphQL API` available at the following endpoint: https://swapi-graphql.netlify.app/.netlify/functions/index.\n",
"In this example, we'll be using the public `Star Wars GraphQL API` available at the following endpoint: https://swapi-graphql.netlify.app/graphql .\n",
"\n",
"First, you need to install `httpx` and `gql` Python packages."
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install httpx gql > /dev/null"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade --quiet langchain-community"
]
@@ -56,21 +72,36 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
"from langchain_openai import OpenAI\n",
"import os\n",
"\n",
"llm = OpenAI(temperature=0)\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.agent_toolkits.load_tools import load_tools\n",
"\n",
"tools = load_tools(\n",
" [\"graphql\"],\n",
" graphql_endpoint=\"https://swapi-graphql.netlify.app/.netlify/functions/index\",\n",
")\n",
"\n",
"agent = initialize_agent(\n",
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
" graphql_endpoint=\"https://swapi-graphql.netlify.app/graphql\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -80,35 +111,55 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"Search for the titles of all the stawars films stored in the graphql database that has this schema allFilms {\n",
" films {\n",
" title\n",
" director\n",
" releaseDate\n",
" speciesConnection {\n",
" species {\n",
" name\n",
" classification\n",
" homeworld {\n",
" name\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to query the graphql database to get the titles of all the star wars films\n",
"Action: query_graphql\n",
"Action Input: query { allFilms { films { title } } }\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m\"{\\n \\\"allFilms\\\": {\\n \\\"films\\\": [\\n {\\n \\\"title\\\": \\\"A New Hope\\\"\\n },\\n {\\n \\\"title\\\": \\\"The Empire Strikes Back\\\"\\n },\\n {\\n \\\"title\\\": \\\"Return of the Jedi\\\"\\n },\\n {\\n \\\"title\\\": \\\"The Phantom Menace\\\"\\n },\\n {\\n \\\"title\\\": \\\"Attack of the Clones\\\"\\n },\\n {\\n \\\"title\\\": \\\"Revenge of the Sith\\\"\\n }\\n ]\\n }\\n}\"\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the titles of all the star wars films\n",
"Final Answer: The titles of all the star wars films are: A New Hope, The Empire Strikes Back, Return of the Jedi, The Phantom Menace, Attack of the Clones, and Revenge of the Sith.\u001b[0m\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" query_graphql (call_tN5A0dBbfOMewuw8Yy13bYpW)\n",
" Call ID: call_tN5A0dBbfOMewuw8Yy13bYpW\n",
" Args:\n",
" tool_input: query { allFilms { films { title } } }\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: query_graphql\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
"\"{\\n \\\"allFilms\\\": {\\n \\\"films\\\": [\\n {\\n \\\"title\\\": \\\"A New Hope\\\"\\n },\\n {\\n \\\"title\\\": \\\"The Empire Strikes Back\\\"\\n },\\n {\\n \\\"title\\\": \\\"Return of the Jedi\\\"\\n },\\n {\\n \\\"title\\\": \\\"The Phantom Menace\\\"\\n },\\n {\\n \\\"title\\\": \\\"Attack of the Clones\\\"\\n },\\n {\\n \\\"title\\\": \\\"Revenge of the Sith\\\"\\n }\\n ]\\n }\\n}\"\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"The titles of all the Star Wars films stored in the database are:\n",
"1. A New Hope\n",
"2. The Empire Strikes Back\n",
"3. Return of the Jedi\n",
"4. The Phantom Menace\n",
"5. Attack of the Clones\n",
"6. Revenge of the Sith\n",
"\n",
"If you would like more information about any of these films, please let me know!\n"
]
},
{
"data": {
"text/plain": [
"'The titles of all the star wars films are: A New Hope, The Empire Strikes Back, Return of the Jedi, The Phantom Menace, Attack of the Clones, and Revenge of the Sith.'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
@@ -133,9 +184,24 @@
"\n",
"suffix = \"Search for the titles of all the stawars films stored in the graphql database that has this schema \"\n",
"\n",
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": suffix + graphql_fields,\n",
"}\n",
"\n",
"agent.run(suffix + graphql_fields)"
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@@ -157,7 +223,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.12.4"
}
},
"nbformat": 4,

View File

@@ -22,8 +22,26 @@
},
{
"cell_type": "code",
"execution_count": 9,
"id": "34bb5968",
"execution_count": 1,
"id": "8b81a74e-db10-4e8d-9f90-83219df30ab3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade --quiet pyowm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "78ab9fcd-bb7b-434b-9a38-0a9249e35768",
"metadata": {},
"outputs": [],
"source": [
@@ -38,8 +56,8 @@
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ac4910f8",
"execution_count": 3,
"id": "0a8aa4b0-6aea-4172-9546-361e127a4a02",
"metadata": {},
"outputs": [
{
@@ -47,17 +65,17 @@
"output_type": "stream",
"text": [
"In London,GB, the current weather is as follows:\n",
"Detailed status: broken clouds\n",
"Wind speed: 2.57 m/s, direction: 240°\n",
"Humidity: 55%\n",
"Detailed status: overcast clouds\n",
"Wind speed: 4.12 m/s, direction: 10°\n",
"Humidity: 51%\n",
"Temperature: \n",
" - Current: 20.12°C\n",
" - High: 21.75°C\n",
" - Low: 18.68°C\n",
" - Feels like: 19.62°C\n",
" - Current: 12.82°C\n",
" - High: 13.98°C\n",
" - Low: 12.01°C\n",
" - Feels like: 11.49°C\n",
"Rain: {}\n",
"Heat index: None\n",
"Cloud cover: 75%\n"
"Cloud cover: 100%\n"
]
}
],
@@ -76,76 +94,82 @@
},
{
"cell_type": "code",
"execution_count": 11,
"id": "b3367417",
"execution_count": 4,
"id": "402c832c-87c7-4088-b80f-ec1924a43796",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
"from langchain_openai import OpenAI\n",
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"OPENWEATHERMAP_API_KEY\"] = \"\"\n",
"\n",
"llm = OpenAI(temperature=0)\n",
"\n",
"tools = load_tools([\"openweathermap-api\"], llm)\n",
"\n",
"agent_chain = initialize_agent(\n",
" tools=tools, llm=llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
")"
"tools = [weather.run]\n",
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "bf4f6854",
"execution_count": 5,
"id": "9b423a92-1568-4ee2-9c7d-3b9acf7756a1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What's the weather like in London?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" run (call_6vPq9neyy7oOnht29ExidE2g)\n",
" Call ID: call_6vPq9neyy7oOnht29ExidE2g\n",
" Args:\n",
" location: London\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: run\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to find out the current weather in London.\n",
"Action: OpenWeatherMap\n",
"Action Input: London,GB\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mIn London,GB, the current weather is as follows:\n",
"Detailed status: broken clouds\n",
"Wind speed: 2.57 m/s, direction: 240°\n",
"Humidity: 56%\n",
"In London, the current weather is as follows:\n",
"Detailed status: overcast clouds\n",
"Wind speed: 4.12 m/s, direction: 10°\n",
"Humidity: 51%\n",
"Temperature: \n",
" - Current: 20.11°C\n",
" - High: 21.75°C\n",
" - Low: 18.68°C\n",
" - Feels like: 19.64°C\n",
" - Current: 12.82°C\n",
" - High: 13.98°C\n",
" - Low: 12.01°C\n",
" - Feels like: 11.49°C\n",
"Rain: {}\n",
"Heat index: None\n",
"Cloud cover: 75%\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the current weather in London.\n",
"Final Answer: The current weather in London is broken clouds, with a wind speed of 2.57 m/s, direction 240°, humidity of 56%, temperature of 20.11°C, high of 21.75°C, low of 18.68°C, and a heat index of None.\u001b[0m\n",
"Cloud cover: 100%\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
"The weather in London is currently overcast with 100% cloud cover. The temperature is around 12.82°C, feeling like 11.49°C. The wind is blowing at 4.12 m/s from the direction of 10°. Humidity is at 51%. The high for the day is 13.98°C, and the low is 12.01°C.\n"
]
},
{
"data": {
"text/plain": [
"'The current weather in London is broken clouds, with a wind speed of 2.57 m/s, direction 240°, humidity of 56%, temperature of 20.11°C, high of 21.75°C, low of 18.68°C, and a heat index of None.'"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent_chain.run(\"What's the weather like in London?\")"
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"What's the weather like in London?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2af226a-9cca-468d-b07f-0a928ea61f48",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@@ -164,7 +188,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.13.2"
}
},
"nbformat": 4,

View File

@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 1,
"id": "70871a99-ffee-47d7-8e02-82eb99971f28",
"metadata": {},
"outputs": [],
@@ -51,7 +51,7 @@
{
"data": {
"text/plain": [
"'Barack Hussein Obama II'"
"'Barack Obama Full name: Barack Hussein Obama II'"
]
},
"execution_count": 4,
@@ -73,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 5,
"id": "17a9b1ad-6e84-4949-8ebd-8c52f6b296e3",
"metadata": {},
"outputs": [],
@@ -83,48 +83,11 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "cf8970a5-00e1-46bd-ba53-6a974eebbc10",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m Yes.\n",
"Follow up: How old was Plato when he died?\u001b[0m\n",
"Intermediate answer: \u001b[36;1m\u001b[1;3meighty\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mFollow up: How old was Socrates when he died?\u001b[0m\n",
"Intermediate answer: \u001b[36;1m\u001b[1;3m| Socrates | \n",
"| -------- | \n",
"| Born | c. 470 BC Deme Alopece, Athens | \n",
"| Died | 399 BC (aged approximately 71) Athens | \n",
"| Cause of death | Execution by forced suicide by poisoning | \n",
"| Spouse(s) | Xanthippe, Myrto | \n",
"\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mFollow up: How old was Aristotle when he died?\u001b[0m\n",
"Intermediate answer: \u001b[36;1m\u001b[1;3m62 years\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mSo the final answer is: Plato\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'Plato'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"from langchain.agents import AgentType, initialize_agent\n",
"from langchain_community.utilities import SearchApiAPIWrapper\n",
"from langchain_core.tools import Tool\n",
"from langchain_openai import OpenAI\n",
@@ -133,16 +96,88 @@
"search = SearchApiAPIWrapper()\n",
"tools = [\n",
" Tool(\n",
" name=\"Intermediate Answer\",\n",
" name=\"intermediate_answer\",\n",
" func=search.run,\n",
" description=\"useful for when you need to ask with search\",\n",
" )\n",
"]\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4198dda8-b7a9-4ae9-bcb6-b95e2c7681b9",
"metadata": {},
"outputs": [],
"source": [
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"self_ask_with_search = initialize_agent(\n",
" tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True\n",
")\n",
"self_ask_with_search.run(\"Who lived longer: Plato, Socrates, or Aristotle?\")"
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c24ad140-d41f-4e99-a42f-11371c3897b5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"Who lived longer: Plato, Socrates, or Aristotle?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" intermediate_answer (call_Q0JquDV3SWfnn3rkwJkJaffG)\n",
" Call ID: call_Q0JquDV3SWfnn3rkwJkJaffG\n",
" Args:\n",
" __arg1: Lifespan of Plato\n",
" intermediate_answer (call_j9rXzVlrCcGc8HOFnKUH6j5E)\n",
" Call ID: call_j9rXzVlrCcGc8HOFnKUH6j5E\n",
" Args:\n",
" __arg1: Lifespan of Socrates\n",
" intermediate_answer (call_IBQT2qn5PzDE6q0ZyfPdhRaX)\n",
" Call ID: call_IBQT2qn5PzDE6q0ZyfPdhRaX\n",
" Args:\n",
" __arg1: Lifespan of Aristotle\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: intermediate_answer\n",
"\n",
"384322 BC was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, ...\n",
"The Greek philosopher Aristotle (384-322 B.C.) made significant and lasting contributions to nearly every aspect of human knowledge, ...\n",
"Aristotle's lifespan (384 - 322) (jan 1, 384 BC jan 1, 322 BC). Added to timeline: Political Philosophy timeline. ByEdoardo. 25 Aug 2020.\n",
"Aristotle was one of the greatest philosophers and scientists the world has ever seen. He was born in 384 bc at Stagirus, a Greek seaport on the coast of Thrace ...\n",
"393c. 370 bce), king of Macedonia and grandfather of Alexander the Great (reigned 336323 bce). After his father's death in 367, Aristotle ...\n",
"It is difficult to rule out that possibility decisively, since little is known about the period of Aristotle's life from 341335. He evidently ...\n",
"Lifespan: c. 384 B.C. to 322 B.C.; Contributions: Considered one of the greatest thinkers in various fields including politics, psychology, and ...\n",
"Aristotle (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]) lived 384322 BC.\n",
"Aristotle (384 B.C.E.—322 B.C.E.). Aristotle is a towering figure in ancient Greek philosophy, who made important contributions to logic, criticism, ...\n",
"Aristotle. Born: 384 BC in Stagirus, Macedonia, Greece Died: 322 BC in Chalcis, Euboea, Greece. Aristotle was not primarily a mathematician but made ...\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"Based on the information:\n",
"\n",
"- Plato reportedly lived to be around eighty or eighty-one years old.\n",
"- Socrates' exact lifespan is not directly stated here, but he is known historically to have lived approximately from 470 BC to 399 BC, making him around 71 years old.\n",
"- Aristotle lived from 384 BC to 322 BC, which means he was about 62 years old.\n",
"\n",
"Therefore, Plato lived longer than both Socrates and Aristotle.\n"
]
}
],
"source": [
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"Who lived longer: Plato, Socrates, or Aristotle?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
@@ -157,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "6d0b4411-780a-4dcf-91b6-f3544e31e532",
"metadata": {},
"outputs": [],
@@ -167,17 +202,17 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "34e79449-6b33-4b45-9306-7e3dab1b8599",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Azure AI Engineer Be an XpanderCandidatar-meCandidatar-meCandidatar-me\\n\\nShare:\\n\\nAzure AI Engineer\\n\\nA área Digital Xperience da Xpand IT é uma equipa tecnológica de rápido crescimento que se concentra em tecnologias Microsoft e Mobile. A sua principal missão é fornecer soluções de software de alta qualidade que atendam às necessidades do utilizador final, num mundo tecnológico continuamente exigente e em ritmo acelerado, proporcionando a melhor experiência em termos de personalização, performance'"
"'No good search result found'"
]
},
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -196,7 +231,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"id": "b16b7cd9-f0fe-4030-a36b-bbb52b19da18",
"metadata": {},
"outputs": [],
@@ -206,7 +241,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "e8adb325-2ad0-4a39-9bc2-d220ec3a29be",
"metadata": {},
"outputs": [
@@ -214,22 +249,22 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{'search_metadata': {'id': 'search_qVdXG2jzvrlqTzayeYoaOb8A',\n",
"{'search_metadata': {'id': 'search_6Lpb2Z8vDqdsPRbrGkVgQzRy',\n",
" 'status': 'Success',\n",
" 'created_at': '2023-09-25T15:22:30Z',\n",
" 'request_time_taken': 3.21,\n",
" 'parsing_time_taken': 0.03,\n",
" 'total_time_taken': 3.24,\n",
" 'created_at': '2025-05-11T03:39:28Z',\n",
" 'request_time_taken': 0.86,\n",
" 'parsing_time_taken': 0.01,\n",
" 'total_time_taken': 0.87,\n",
" 'request_url': 'https://scholar.google.com/scholar?q=Large+Language+Models&hl=en',\n",
" 'html_url': 'https://www.searchapi.io/api/v1/searches/search_qVdXG2jzvrlqTzayeYoaOb8A.html',\n",
" 'json_url': 'https://www.searchapi.io/api/v1/searches/search_qVdXG2jzvrlqTzayeYoaOb8A'},\n",
" 'html_url': 'https://www.searchapi.io/api/v1/searches/search_6Lpb2Z8vDqdsPRbrGkVgQzRy.html',\n",
" 'json_url': 'https://www.searchapi.io/api/v1/searches/search_6Lpb2Z8vDqdsPRbrGkVgQzRy'},\n",
" 'search_parameters': {'engine': 'google_scholar',\n",
" 'q': 'Large Language Models',\n",
" 'hl': 'en'},\n",
" 'search_information': {'query_displayed': 'Large Language Models',\n",
" 'total_results': 6420000,\n",
" 'total_results': 6390000,\n",
" 'page': 1,\n",
" 'time_taken_displayed': 0.06},\n",
" 'time_taken_displayed': 0.08},\n",
" 'organic_results': [{'position': 1,\n",
" 'title': 'ChatGPT for good? On opportunities and '\n",
" 'challenges of large language models for '\n",
@@ -245,15 +280,15 @@
" 'we argue that large language models in '\n",
" 'education require …',\n",
" 'inline_links': {'cited_by': {'cites_id': '8166055256995715258',\n",
" 'total': 410,\n",
" 'link': 'https://scholar.google.com/scholar?cites=8166055256995715258&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'total': 4675,\n",
" 'link': 'https://scholar.google.com/scholar?cites=8166055256995715258&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '8166055256995715258',\n",
" 'total': 10,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=8166055256995715258&hl=en&as_sdt=0,33'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:uthwmf2nU3EJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'resource': {'name': 'edarxiv.org',\n",
" 'total': 16,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=8166055256995715258&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:uthwmf2nU3EJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'osf.io',\n",
" 'format': 'PDF',\n",
" 'link': 'https://edarxiv.org/5er8f/download?format=pdf'},\n",
" 'link': 'https://osf.io/preprints/edarxiv/5er8f/download'},\n",
" 'authors': [{'name': 'E Kasneci',\n",
" 'id': 'bZVkVvoAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=bZVkVvoAAAAJ&hl=en&oi=sra'},\n",
@@ -267,6 +302,82 @@
" 'id': 'TjfQ8QkAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=TjfQ8QkAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 2,\n",
" 'title': 'A survey on evaluation of large language '\n",
" 'models',\n",
" 'data_cid': 'o93zfHYlUTIJ',\n",
" 'link': 'https://dl.acm.org/doi/abs/10.1145/3641289',\n",
" 'publication': 'Y Chang, X Wang, J Wang, Y Wu, L Yang… - '\n",
" 'ACM transactions on …, 2024 - dl.acm.org',\n",
" 'snippet': '… 3.1 Natural Language Processing Tasks … '\n",
" 'the development of language models, '\n",
" 'particularly large language models, was to '\n",
" 'enhance performance on natural language '\n",
" 'processing tasks, …',\n",
" 'inline_links': {'cited_by': {'cites_id': '3625720365842685347',\n",
" 'total': 2864,\n",
" 'link': 'https://scholar.google.com/scholar?cites=3625720365842685347&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '3625720365842685347',\n",
" 'total': 8,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=3625720365842685347&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:o93zfHYlUTIJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'acm.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://dl.acm.org/doi/pdf/10.1145/3641289'},\n",
" 'authors': [{'name': 'Y Chang',\n",
" 'id': 'Hw-lrpAAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=Hw-lrpAAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'X Wang',\n",
" 'id': 'Q7Ieos8AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=Q7Ieos8AAAAJ&hl=en&oi=sra'},\n",
" {'name': 'J Wang',\n",
" 'id': 'hBZ_tKsAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=hBZ_tKsAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'Y Wu',\n",
" 'id': 'KVeRu2QAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=KVeRu2QAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'L Yang',\n",
" 'id': 'go3sFxcAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=go3sFxcAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 3,\n",
" 'title': 'A comprehensive overview of large language '\n",
" 'models',\n",
" 'data_cid': 'UDLkJGuOVl4J',\n",
" 'link': 'https://arxiv.org/abs/2307.06435',\n",
" 'publication': 'H Naveed, AU Khan, S Qiu, M Saqib, S '\n",
" 'Anwar… - arXiv preprint arXiv …, 2023 - '\n",
" 'arxiv.org',\n",
" 'snippet': '… Large Language Models (LLMs) have recently '\n",
" 'demonstrated remarkable capabilities in '\n",
" 'natural language processing tasks and '\n",
" 'beyond. This success of LLMs has led to a '\n",
" 'large influx of …',\n",
" 'inline_links': {'cited_by': {'cites_id': '6797777278393922128',\n",
" 'total': 990,\n",
" 'link': 'https://scholar.google.com/scholar?cites=6797777278393922128&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '6797777278393922128',\n",
" 'total': 4,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=6797777278393922128&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:UDLkJGuOVl4J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:UDLkJGuOVl4J:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'arxiv.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://arxiv.org/pdf/2307.06435'},\n",
" 'authors': [{'name': 'H Naveed',\n",
" 'id': 'k5dpooQAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=k5dpooQAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'AU Khan',\n",
" 'id': 'sbOhz2UAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=sbOhz2UAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'S Qiu',\n",
" 'id': 'OPNVthUAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=OPNVthUAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'M Saqib',\n",
" 'id': 'KvbLR3gAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=KvbLR3gAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'S Anwar',\n",
" 'id': 'vPJIHywAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=vPJIHywAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 4,\n",
" 'title': 'Large language models in medicine',\n",
" 'data_cid': 'Ph9AwHTmhzAJ',\n",
" 'link': 'https://www.nature.com/articles/s41591-023-02448-8',\n",
@@ -279,11 +390,15 @@
" '(LLaMA) as its backend model 30 . Finally, '\n",
" 'cheap imitations of …',\n",
" 'inline_links': {'cited_by': {'cites_id': '3497017024792502078',\n",
" 'total': 25,\n",
" 'link': 'https://scholar.google.com/scholar?cites=3497017024792502078&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'total': 2474,\n",
" 'link': 'https://scholar.google.com/scholar?cites=3497017024792502078&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '3497017024792502078',\n",
" 'total': 3,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=3497017024792502078&hl=en&as_sdt=0,33'}},\n",
" 'total': 7,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=3497017024792502078&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:Ph9AwHTmhzAJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'google.com',\n",
" 'format': 'PDF',\n",
" 'link': 'https://drive.google.com/file/d/1FKEGsSZ9GYOeToeKpxB4m3atGRbC-TSm/view'},\n",
" 'authors': [{'name': 'AJ Thirunavukarasu',\n",
" 'id': '3qb1AYwAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=3qb1AYwAAAAJ&hl=en&oi=sra'},\n",
@@ -293,43 +408,132 @@
" {'name': 'K Elangovan',\n",
" 'id': 'BE_lVTQAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=BE_lVTQAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 3,\n",
" 'title': 'Extracting training data from large language '\n",
" 'models',\n",
" 'data_cid': 'mEYsWK6bWKoJ',\n",
" 'link': 'https://www.usenix.org/conference/usenixsecurity21/presentation/carlini-extracting',\n",
" 'publication': 'N Carlini, F Tramer, E Wallace, M '\n",
" 'Jagielski… - 30th USENIX Security …, '\n",
" '2021 - usenix.org',\n",
" 'snippet': '… language model trained on scrapes of the '\n",
" 'public Internet, and are able to extract '\n",
" 'hundreds of verbatim text sequences from the '\n",
" 'model… models are more vulnerable than '\n",
" 'smaller models. …',\n",
" 'inline_links': {'cited_by': {'cites_id': '12274731957504198296',\n",
" 'total': 742,\n",
" 'link': 'https://scholar.google.com/scholar?cites=12274731957504198296&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'versions': {'cluster_id': '12274731957504198296',\n",
" 'total': 8,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=12274731957504198296&hl=en&as_sdt=0,33'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:mEYsWK6bWKoJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:mEYsWK6bWKoJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'resource': {'name': 'usenix.org',\n",
" {'position': 5,\n",
" 'title': 'A watermark for large language models',\n",
" 'data_cid': 'BlSyLHT4iiEJ',\n",
" 'link': 'https://proceedings.mlr.press/v202/kirchenbauer23a.html',\n",
" 'publication': 'J Kirchenbauer, J Geiping, Y Wen… - '\n",
" 'International …, 2023 - '\n",
" 'proceedings.mlr.press',\n",
" 'snippet': '… We propose a watermarking framework for '\n",
" 'proprietary language models. The … in the '\n",
" 'language model just before it produces a '\n",
" 'probability vector. The last layer of the '\n",
" 'language model …',\n",
" 'inline_links': {'cited_by': {'cites_id': '2417017327887471622',\n",
" 'total': 774,\n",
" 'link': 'https://scholar.google.com/scholar?cites=2417017327887471622&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '2417017327887471622',\n",
" 'total': 13,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=2417017327887471622&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:BlSyLHT4iiEJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:BlSyLHT4iiEJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'mlr.press',\n",
" 'format': 'PDF',\n",
" 'link': 'https://www.usenix.org/system/files/sec21-carlini-extracting.pdf'},\n",
" 'authors': [{'name': 'N Carlini',\n",
" 'id': 'q4qDvAoAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=q4qDvAoAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'F Tramer',\n",
" 'id': 'ijH0-a8AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=ijH0-a8AAAAJ&hl=en&oi=sra'},\n",
" {'name': 'E Wallace',\n",
" 'id': 'SgST3LkAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=SgST3LkAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'M Jagielski',\n",
" 'id': '_8rw_GMAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=_8rw_GMAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 4,\n",
" 'link': 'https://proceedings.mlr.press/v202/kirchenbauer23a/kirchenbauer23a.pdf'},\n",
" 'authors': [{'name': 'J Kirchenbauer',\n",
" 'id': '48GJrbsAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=48GJrbsAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'J Geiping',\n",
" 'id': '206vNCEAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=206vNCEAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'Y Wen',\n",
" 'id': 'oUYfjg0AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=oUYfjg0AAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 6,\n",
" 'title': 'Welcome to the era of chatgpt et al. the '\n",
" 'prospects of large language models',\n",
" 'data_cid': '3UrgC1BmpV8J',\n",
" 'link': 'https://link.springer.com/article/10.1007/s12599-023-00795-x',\n",
" 'publication': 'T Teubner, CM Flath, C Weinhardt… - '\n",
" 'Business & Information …, 2023 - '\n",
" 'Springer',\n",
" 'snippet': 'The emergence of Large Language Models '\n",
" '(LLMs) in combination with easy-to-use '\n",
" 'interfaces such as ChatGPT, Bing Chat, and '\n",
" 'Googles Bard represent both a Herculean '\n",
" 'task and a …',\n",
" 'inline_links': {'cited_by': {'cites_id': '6892027298743077597',\n",
" 'total': 409,\n",
" 'link': 'https://scholar.google.com/scholar?cites=6892027298743077597&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '6892027298743077597',\n",
" 'total': 16,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=6892027298743077597&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:3UrgC1BmpV8J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'springer.com',\n",
" 'format': 'PDF',\n",
" 'link': 'https://link.springer.com/content/pdf/10.1007/s12599-023-00795-x.pdf'},\n",
" 'authors': [{'name': 'T Teubner',\n",
" 'id': 'ZeCM1k8AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=ZeCM1k8AAAAJ&hl=en&oi=sra'},\n",
" {'name': 'CM Flath',\n",
" 'id': '5Iy85HsAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=5Iy85HsAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'C Weinhardt',\n",
" 'id': 'lhfZxjAAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=lhfZxjAAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 7,\n",
" 'title': 'Talking about large language models',\n",
" 'data_cid': '3eYYI745r_0J',\n",
" 'link': 'https://dl.acm.org/doi/abs/10.1145/3624724',\n",
" 'publication': 'M Shanahan - Communications of the ACM, '\n",
" '2024 - dl.acm.org',\n",
" 'snippet': '… Recently, it has become commonplace to use '\n",
" 'the term “large language model” both for the '\n",
" 'generative models themselves and for the '\n",
" 'systems in which they are embedded, '\n",
" 'especially in …',\n",
" 'inline_links': {'cited_by': {'cites_id': '18279892901315536605',\n",
" 'total': 477,\n",
" 'link': 'https://scholar.google.com/scholar?cites=18279892901315536605&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '18279892901315536605',\n",
" 'total': 4,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=18279892901315536605&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:3eYYI745r_0J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'acm.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://dl.acm.org/doi/pdf/10.1145/3624724'},\n",
" 'authors': [{'name': 'M Shanahan',\n",
" 'id': '00bnGpAAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=00bnGpAAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 8,\n",
" 'title': 'Explainability for large language models: A '\n",
" 'survey',\n",
" 'data_cid': '0AqRKEINMw4J',\n",
" 'link': 'https://dl.acm.org/doi/abs/10.1145/3639372',\n",
" 'publication': 'H Zhao, H Chen, F Yang, N Liu, H Deng, H '\n",
" 'Cai… - ACM Transactions on …, 2024 - '\n",
" 'dl.acm.org',\n",
" 'snippet': '… Let us consider a scenario where we have a '\n",
" 'language model and we input a specific text '\n",
" 'into the model. The model then produces a '\n",
" 'classification output, such as sentiment …',\n",
" 'inline_links': {'cited_by': {'cites_id': '1023176118142831312',\n",
" 'total': 576,\n",
" 'link': 'https://scholar.google.com/scholar?cites=1023176118142831312&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '1023176118142831312',\n",
" 'total': 7,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=1023176118142831312&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:0AqRKEINMw4J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'acm.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://dl.acm.org/doi/pdf/10.1145/3639372'},\n",
" 'authors': [{'name': 'H Zhao',\n",
" 'id': '9FobigIAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=9FobigIAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'H Chen',\n",
" 'id': 'DyYOgLwAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=DyYOgLwAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'F Yang',\n",
" 'id': 'RXFeW-8AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=RXFeW-8AAAAJ&hl=en&oi=sra'},\n",
" {'name': 'N Liu',\n",
" 'id': 'Nir-EDYAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=Nir-EDYAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'H Cai',\n",
" 'id': 'Kz-r34UAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=Kz-r34UAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 9,\n",
" 'title': 'Emergent abilities of large language models',\n",
" 'data_cid': 'hG0iVOrOguoJ',\n",
" 'link': 'https://arxiv.org/abs/2206.07682',\n",
@@ -341,16 +545,16 @@
" 'efficiency on a wide range of downstream '\n",
" 'tasks. This paper instead discusses an …',\n",
" 'inline_links': {'cited_by': {'cites_id': '16898296257676733828',\n",
" 'total': 621,\n",
" 'link': 'https://scholar.google.com/scholar?cites=16898296257676733828&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'total': 3436,\n",
" 'link': 'https://scholar.google.com/scholar?cites=16898296257676733828&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '16898296257676733828',\n",
" 'total': 12,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=16898296257676733828&hl=en&as_sdt=0,33'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:hG0iVOrOguoJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:hG0iVOrOguoJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'total': 11,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=16898296257676733828&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:hG0iVOrOguoJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:hG0iVOrOguoJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'arxiv.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://arxiv.org/pdf/2206.07682.pdf?trk=cndc-detail'},\n",
" 'link': 'https://arxiv.org/pdf/2206.07682'},\n",
" 'authors': [{'name': 'J Wei',\n",
" 'id': 'wA5TK_0AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=wA5TK_0AAAAJ&hl=en&oi=sra'},\n",
@@ -362,232 +566,78 @@
" 'link': 'https://scholar.google.com/citations?user=WMBXw1EAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'C Raffel',\n",
" 'id': 'I66ZBYwAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=I66ZBYwAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'B Zoph',\n",
" 'id': 'NL_7iTwAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=NL_7iTwAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 5,\n",
" 'title': 'A survey on evaluation of large language '\n",
" 'models',\n",
" 'data_cid': 'ZYohnzOz-XgJ',\n",
" 'link': 'https://arxiv.org/abs/2307.03109',\n",
" 'publication': 'Y Chang, X Wang, J Wang, Y Wu, K Zhu… - '\n",
" 'arXiv preprint arXiv …, 2023 - arxiv.org',\n",
" 'snippet': '… 3.1 Natural Language Processing Tasks … '\n",
" 'the development of language models, '\n",
" 'particularly large language models, was to '\n",
" 'enhance performance on natural language '\n",
" 'processing tasks, …',\n",
" 'inline_links': {'cited_by': {'cites_id': '8717195588046785125',\n",
" 'total': 31,\n",
" 'link': 'https://scholar.google.com/scholar?cites=8717195588046785125&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'versions': {'cluster_id': '8717195588046785125',\n",
" 'total': 3,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=8717195588046785125&hl=en&as_sdt=0,33'},\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:ZYohnzOz-XgJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'resource': {'name': 'arxiv.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://arxiv.org/pdf/2307.03109'},\n",
" 'authors': [{'name': 'X Wang',\n",
" 'id': 'Q7Ieos8AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=Q7Ieos8AAAAJ&hl=en&oi=sra'},\n",
" {'name': 'J Wang',\n",
" 'id': 'YomxTXQAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=YomxTXQAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'Y Wu',\n",
" 'id': 'KVeRu2QAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=KVeRu2QAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'K Zhu',\n",
" 'id': 'g75dFLYAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=g75dFLYAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 6,\n",
" 'title': 'Evaluating large language models trained on '\n",
" 'code',\n",
" 'data_cid': '3tNvW3l5nU4J',\n",
" 'link': 'https://arxiv.org/abs/2107.03374',\n",
" 'publication': 'M Chen, J Tworek, H Jun, Q Yuan, HPO '\n",
" 'Pinto… - arXiv preprint arXiv …, 2021 - '\n",
" 'arxiv.org',\n",
" 'snippet': '… We introduce Codex, a GPT language model '\n",
" 'finetuned on publicly available code from '\n",
" 'GitHub, and study its Python code-writing '\n",
" 'capabilities. A distinct production version '\n",
" 'of Codex …',\n",
" 'inline_links': {'cited_by': {'cites_id': '5664817468434011102',\n",
" 'total': 941,\n",
" 'link': 'https://scholar.google.com/scholar?cites=5664817468434011102&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'versions': {'cluster_id': '5664817468434011102',\n",
" 'total': 2,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=5664817468434011102&hl=en&as_sdt=0,33'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:3tNvW3l5nU4J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:3tNvW3l5nU4J:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'resource': {'name': 'arxiv.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://arxiv.org/pdf/2107.03374.pdf?trk=public_post_comment-text'},\n",
" 'authors': [{'name': 'M Chen',\n",
" 'id': '5fU-QMwAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=5fU-QMwAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'J Tworek',\n",
" 'id': 'ZPuESCQAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=ZPuESCQAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'Q Yuan',\n",
" 'id': 'B059m2EAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=B059m2EAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 7,\n",
" 'title': 'Large language models in machine translation',\n",
" 'data_cid': 'sY5m_Y3-0Y4J',\n",
" 'link': 'http://research.google/pubs/pub33278.pdf',\n",
" 'publication': 'T Brants, AC Popat, P Xu, FJ Och, J Dean '\n",
" '- 2007 - research.google',\n",
" 'snippet': '… the benefits of largescale statistical '\n",
" 'language modeling in ma… trillion tokens, '\n",
" 'resulting in language models having up to '\n",
" '300 … is inexpensive to train on large data '\n",
" 'sets and approaches the …',\n",
" 'type': 'PDF',\n",
" 'inline_links': {'cited_by': {'cites_id': '10291286509313494705',\n",
" 'total': 737,\n",
" 'link': 'https://scholar.google.com/scholar?cites=10291286509313494705&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'versions': {'cluster_id': '10291286509313494705',\n",
" 'total': 31,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=10291286509313494705&hl=en&as_sdt=0,33'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:sY5m_Y3-0Y4J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:sY5m_Y3-0Y4J:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'resource': {'name': 'research.google',\n",
" 'format': 'PDF',\n",
" 'link': 'http://research.google/pubs/pub33278.pdf'},\n",
" 'authors': [{'name': 'FJ Och',\n",
" 'id': 'ITGdg6oAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=ITGdg6oAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'J Dean',\n",
" 'id': 'NMS69lQAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=NMS69lQAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 8,\n",
" 'title': 'A watermark for large language models',\n",
" 'data_cid': 'BlSyLHT4iiEJ',\n",
" 'link': 'https://arxiv.org/abs/2301.10226',\n",
" 'publication': 'J Kirchenbauer, J Geiping, Y Wen, J '\n",
" 'Katz… - arXiv preprint arXiv …, 2023 - '\n",
" 'arxiv.org',\n",
" 'snippet': '… To derive this watermark, we examine what '\n",
" 'happens in the language model just before it '\n",
" 'produces a probability vector. The last '\n",
" 'layer of the language model outputs a vector '\n",
" 'of logits l(t). …',\n",
" 'inline_links': {'cited_by': {'cites_id': '2417017327887471622',\n",
" 'total': 104,\n",
" 'link': 'https://scholar.google.com/scholar?cites=2417017327887471622&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'versions': {'cluster_id': '2417017327887471622',\n",
" 'total': 4,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=2417017327887471622&hl=en&as_sdt=0,33'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:BlSyLHT4iiEJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:BlSyLHT4iiEJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'resource': {'name': 'arxiv.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://arxiv.org/pdf/2301.10226.pdf?curius=1419'},\n",
" 'authors': [{'name': 'J Kirchenbauer',\n",
" 'id': '48GJrbsAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=48GJrbsAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'J Geiping',\n",
" 'id': '206vNCEAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=206vNCEAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'Y Wen',\n",
" 'id': 'oUYfjg0AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=oUYfjg0AAAAJ&hl=en&oi=sra'},\n",
" {'name': 'J Katz',\n",
" 'id': 'yPw4WjoAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=yPw4WjoAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 9,\n",
" 'title': 'ChatGPT and other large language models are '\n",
" 'double-edged swords',\n",
" 'data_cid': 'So0q8TRvxhYJ',\n",
" 'link': 'https://pubs.rsna.org/doi/full/10.1148/radiol.230163',\n",
" 'publication': 'Y Shen, L Heacock, J Elias, KD Hentel, B '\n",
" 'Reig, G Shih… - Radiology, 2023 - '\n",
" 'pubs.rsna.org',\n",
" 'snippet': '… Large Language Models (LLMs) are deep '\n",
" 'learning models trained to understand and '\n",
" 'generate natural language. Recent studies '\n",
" 'demonstrated that LLMs achieve great success '\n",
" 'in a …',\n",
" 'inline_links': {'cited_by': {'cites_id': '1641121387398204746',\n",
" 'total': 231,\n",
" 'link': 'https://scholar.google.com/scholar?cites=1641121387398204746&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'versions': {'cluster_id': '1641121387398204746',\n",
" 'total': 3,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=1641121387398204746&hl=en&as_sdt=0,33'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:So0q8TRvxhYJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'authors': [{'name': 'Y Shen',\n",
" 'id': 'XaeN2zgAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=XaeN2zgAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'L Heacock',\n",
" 'id': 'tYYM5IkAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=tYYM5IkAAAAJ&hl=en&oi=sra'}]},\n",
" 'link': 'https://scholar.google.com/citations?user=I66ZBYwAAAAJ&hl=en&oi=sra'}]},\n",
" {'position': 10,\n",
" 'title': 'Pythia: A suite for analyzing large language '\n",
" 'models across training and scaling',\n",
" 'data_cid': 'aaIDvsMAD8QJ',\n",
" 'link': 'https://proceedings.mlr.press/v202/biderman23a.html',\n",
" 'publication': 'S Biderman, H Schoelkopf… - '\n",
" 'International …, 2023 - '\n",
" 'proceedings.mlr.press',\n",
" 'snippet': '… large language models, we prioritize '\n",
" 'consistency in model … out the most '\n",
" 'performance from each model. For example, we '\n",
" '… models, as it is becoming widely used for '\n",
" 'the largest models, …',\n",
" 'inline_links': {'cited_by': {'cites_id': '14127511396791067241',\n",
" 'total': 89,\n",
" 'link': 'https://scholar.google.com/scholar?cites=14127511396791067241&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
" 'versions': {'cluster_id': '14127511396791067241',\n",
" 'total': 3,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=14127511396791067241&hl=en&as_sdt=0,33'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:aaIDvsMAD8QJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:aaIDvsMAD8QJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
" 'resource': {'name': 'mlr.press',\n",
" 'title': 'A systematic evaluation of large language '\n",
" 'models of code',\n",
" 'data_cid': '-iQSW0h72hYJ',\n",
" 'link': 'https://dl.acm.org/doi/abs/10.1145/3520312.3534862',\n",
" 'publication': 'FF Xu, U Alon, G Neubig, VJ Hellendoorn '\n",
" '- Proceedings of the 6th ACM …, 2022 - '\n",
" 'dl.acm.org',\n",
" 'snippet': '… largest language models for code. We also '\n",
" 'release PolyCoder, a large open-source '\n",
" 'language model for code, trained exclusively '\n",
" 'on code in 12 different programming '\n",
" 'languages. In the …',\n",
" 'inline_links': {'cited_by': {'cites_id': '1646764164453115130',\n",
" 'total': 764,\n",
" 'link': 'https://scholar.google.com/scholar?cites=1646764164453115130&as_sdt=2005&sciodt=0,5&hl=en'},\n",
" 'versions': {'cluster_id': '1646764164453115130',\n",
" 'total': 6,\n",
" 'link': 'https://scholar.google.com/scholar?cluster=1646764164453115130&hl=en&as_sdt=0,5'},\n",
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:-iQSW0h72hYJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
" 'resource': {'name': 'acm.org',\n",
" 'format': 'PDF',\n",
" 'link': 'https://proceedings.mlr.press/v202/biderman23a/biderman23a.pdf'},\n",
" 'authors': [{'name': 'S Biderman',\n",
" 'id': 'bO7H0DAAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=bO7H0DAAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'H Schoelkopf',\n",
" 'id': 'XLahYIYAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=XLahYIYAAAAJ&hl=en&oi=sra'}]}],\n",
" 'related_searches': [{'query': 'large language models machine',\n",
" 'highlighted': ['machine'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=1&q=large+language+models+machine&qst=ib'},\n",
" {'query': 'large language models pruning',\n",
" 'highlighted': ['pruning'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=2&q=large+language+models+pruning&qst=ib'},\n",
" {'query': 'large language models multitask learners',\n",
" 'highlighted': ['multitask learners'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=3&q=large+language+models+multitask+learners&qst=ib'},\n",
" {'query': 'large language models speech recognition',\n",
" 'highlighted': ['speech recognition'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=4&q=large+language+models+speech+recognition&qst=ib'},\n",
" 'link': 'https://dl.acm.org/doi/pdf/10.1145/3520312.3534862'},\n",
" 'authors': [{'name': 'FF Xu',\n",
" 'id': '1hXyfIkAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=1hXyfIkAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'U Alon',\n",
" 'id': 'QBn7vq8AAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=QBn7vq8AAAAJ&hl=en&oi=sra'},\n",
" {'name': 'G Neubig',\n",
" 'id': 'wlosgkoAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=wlosgkoAAAAJ&hl=en&oi=sra'},\n",
" {'name': 'VJ Hellendoorn',\n",
" 'id': 'PfYrc5kAAAAJ',\n",
" 'link': 'https://scholar.google.com/citations?user=PfYrc5kAAAAJ&hl=en&oi=sra'}]}],\n",
" 'related_searches': [{'query': 'emergent large language models',\n",
" 'highlighted': ['emergent'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=1&q=emergent+large+language+models&qst=ib'},\n",
" {'query': 'large language models abilities',\n",
" 'highlighted': ['abilities'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=2&q=large+language+models+abilities&qst=ib'},\n",
" {'query': 'prompt large language models',\n",
" 'highlighted': ['prompt'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=3&q=prompt+large+language+models&qst=ib'},\n",
" {'query': 'large language models training '\n",
" 'compute-optimal',\n",
" 'highlighted': ['training compute-optimal'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=4&q=large+language+models+training+compute-optimal&qst=ib'},\n",
" {'query': 'large language models machine translation',\n",
" 'highlighted': ['machine translation'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=5&q=large+language+models+machine+translation&qst=ib'},\n",
" {'query': 'emergent abilities of large language models',\n",
" 'highlighted': ['emergent abilities of'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=6&q=emergent+abilities+of+large+language+models&qst=ir'},\n",
" {'query': 'language models privacy risks',\n",
" 'highlighted': ['privacy risks'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=7&q=language+models+privacy+risks&qst=ir'},\n",
" {'query': 'language model fine tuning',\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=5&q=large+language+models+machine+translation&qst=ib'},\n",
" {'query': 'large language models zero shot',\n",
" 'highlighted': ['zero shot'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=6&q=large+language+models+zero+shot&qst=ib'},\n",
" {'query': 'large language models chatgpt',\n",
" 'highlighted': ['chatgpt'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=7&q=large+language+models+chatgpt&qst=ib'},\n",
" {'query': 'fine tuning large language models',\n",
" 'highlighted': ['fine tuning'],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=8&q=language+model+fine+tuning&qst=ir'}],\n",
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=8&q=fine+tuning+large+language+models&qst=ib'}],\n",
" 'pagination': {'current': 1,\n",
" 'next': 'https://scholar.google.com/scholar?start=10&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" 'other_pages': {'2': 'https://scholar.google.com/scholar?start=10&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" '3': 'https://scholar.google.com/scholar?start=20&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" '4': 'https://scholar.google.com/scholar?start=30&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" '5': 'https://scholar.google.com/scholar?start=40&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" '6': 'https://scholar.google.com/scholar?start=50&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" '7': 'https://scholar.google.com/scholar?start=60&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" '8': 'https://scholar.google.com/scholar?start=70&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" '9': 'https://scholar.google.com/scholar?start=80&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
" '10': 'https://scholar.google.com/scholar?start=90&q=Large+Language+Models&hl=en&as_sdt=0,33'}}}\n"
" 'next': 'https://scholar.google.com/scholar?start=10&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" 'other_pages': {'2': 'https://scholar.google.com/scholar?start=10&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" '3': 'https://scholar.google.com/scholar?start=20&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" '4': 'https://scholar.google.com/scholar?start=30&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" '5': 'https://scholar.google.com/scholar?start=40&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" '6': 'https://scholar.google.com/scholar?start=50&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" '7': 'https://scholar.google.com/scholar?start=60&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" '8': 'https://scholar.google.com/scholar?start=70&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" '9': 'https://scholar.google.com/scholar?start=80&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
" '10': 'https://scholar.google.com/scholar?start=90&q=Large+Language+Models&hl=en&as_sdt=0,5'}}}\n"
]
}
],
@@ -596,6 +646,14 @@
"results = search.results(\"Large Language Models\")\n",
"pprint.pp(results)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11ab5938-e298-471d-96fc-50405ffad35c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@@ -614,7 +672,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.12.4"
}
},
"nbformat": 4,

View File

@@ -30,11 +30,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade --quiet python-steam-api python-decouple"
"%pip install --upgrade --quiet python-steam-api python-decouple steamspypi"
]
},
{
@@ -48,15 +56,15 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"STEAM_KEY\"] = \"xyz\"\n",
"os.environ[\"STEAM_ID\"] = \"123\"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"abc\""
"os.environ[\"STEAM_KEY\"] = \"\"\n",
"os.environ[\"STEAM_ID\"] = \"\"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\""
]
},
{
@@ -70,63 +78,83 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import AgentType, initialize_agent\n",
"from langchain_community.agent_toolkits.steam.toolkit import SteamToolkit\n",
"from langchain_community.utilities.steam import SteamWebAPIWrapper\n",
"from langchain_openai import OpenAI"
"\n",
"steam = SteamWebAPIWrapper()\n",
"tools = [steam.run]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"llm = OpenAI(temperature=0)\n",
"Steam = SteamWebAPIWrapper()\n",
"toolkit = SteamToolkit.from_steam_api_wrapper(Steam)\n",
"agent = initialize_agent(\n",
" toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
")"
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"can you give the information about the game Terraria?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" run (call_6vHAXSIL2MPugXxlv5uyf9Xk)\n",
" Call ID: call_6vHAXSIL2MPugXxlv5uyf9Xk\n",
" Args:\n",
" mode: get_games_details\n",
" game: Terraria\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: run\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to find the game details\n",
"Action: Get Games Details\n",
"Action Input: Terraria\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mThe id is: 105600\n",
"The id is: [105600]\n",
"The link is: https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13\n",
"The price is: $9.99\n",
"The summary of the game is: Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the players control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a world of your own? Key features: Sandbox Play Randomly generated worlds Free Content Updates \n",
"The supported languages of the game are: English, French, Italian, German, Spanish - Spain, Polish, Portuguese - Brazil, Russian, Simplified Chinese\n",
"\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: Terraria is a game with an id of 105600, a link of https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13, a price of $9.99, a summary of \"Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the players control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"{'input': 'can you give the information about the game Terraria', 'output': 'Terraria is a game with an id of 105600, a link of https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13, a price of $9.99, a summary of \"Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the players control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a'}\n"
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"Terraria is a game where you can dig, fight, explore, and build in a world that is totally at your fingertips. The game gives you the freedom to survive, seek fortune, and achieve glory. You can explore cavernous expanses in search of treasure and materials to craft various gear, machinery, and aesthetic items. Alternatively, you can challenge powerful foes or construct your own city to house mysterious allies you may encounter. The game blends classic action elements with sandbox-style creativity, offering a unique experience where your journey and destination are controlled by you.\n",
"\n",
"Key features of Terraria include sandbox play, randomly generated worlds, and free content updates.\n",
"\n",
"The game is priced at $9.99 and supports multiple languages including English, French, Italian, German, Spanish (Spain), Polish, Portuguese (Brazil), Russian, and Simplified Chinese.\n",
"\n",
"You can find more information and purchase it here: [Terraria on Steam](https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13).\n"
]
}
],
"source": [
"out = agent(\"can you give the information about the game Terraria\")\n",
"print(out)"
"events = agent.stream(\n",
" {\"messages\": [(\"user\", \"can you give the information about the game Terraria?\")]},\n",
" stream_mode=\"values\",\n",
")\n",
"for event in events:\n",
" event[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@@ -145,7 +173,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.12.4"
}
},
"nbformat": 4,

View File

@@ -19,9 +19,9 @@
"## Overview\n",
"\n",
"### Integration details\n",
"| Class | Package | Serializable | [JS support](https://js.langchain.com/docs/integrations/tools/tavily_search) | Package latest |\n",
"| Class | Package | Serializable | [JS support](https://js.langchain.com/docs/integrations/tools/tavily_extract/) | Package latest |\n",
"|:--------------------------------------------------------------|:---------------------------------------------------------------| :---: | :---: | :---: |\n",
"| [TavilyExtract](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-tavily?style=flat-square&label=%20) |\n",
"| [TavilyExtract](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-tavily?style=flat-square&label=%20) |\n",
"\n",
"### Tool features\n",
"| [Returns artifact](/docs/how_to/tool_artifacts/) | Native async | Return data | Pricing |\n",

View File

@@ -20,7 +20,7 @@
"### Integration details\n",
"| Class | Package | Serializable | [JS support](https://js.langchain.com/docs/integrations/tools/tavily_search) | Package latest |\n",
"|:--------------------------------------------------------------|:---------------------------------------------------------------| :---: | :---: | :---: |\n",
"| [TavilySearch](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-tavily?style=flat-square&label=%20) |\n",
"| [TavilySearch](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-tavily?style=flat-square&label=%20) |\n",
"\n",
"### Tool features\n",
"| [Returns artifact](/docs/how_to/tool_artifacts/) | Native async | Return data | Pricing |\n",

File diff suppressed because one or more lines are too long

View File

@@ -17,10 +17,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "38717a85-2c3c-4452-a1c7-1ed4dea3da86",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade --quiet yfinance"
]
@@ -35,121 +43,136 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 2,
"id": "d137dd6c-d3d3-4813-af65-59eaaa6b3d76",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"...\""
"os.environ[\"OPENAI_API_KEY\"] = \"..\""
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "fc42f766-9ce6-4ba3-be6c-5ba8a345b0d3",
"execution_count": null,
"id": "af297977-4fc3-421f-9ce1-f62c1c5b026a",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
]
}
],
"source": [
"from langchain.agents import AgentType, initialize_agent\n",
"from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool\n",
"from langchain_openai import ChatOpenAI\n",
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"llm = ChatOpenAI(temperature=0.0)\n",
"tools = [YahooFinanceNewsTool()]\n",
"agent_chain = initialize_agent(\n",
" tools,\n",
" llm,\n",
" agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
" verbose=True,\n",
")"
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "3d1614b4-508e-4689-84b1-2a387f80aeb1",
"execution_count": 4,
"id": "ac3cbec8-4135-4f5a-bb35-299730c000bd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What happened today with Microsoft stocks?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" yahoo_finance_news (call_s1Waj1rAoJ89CfxWX1RWDiWL)\n",
" Call ID: call_s1Waj1rAoJ89CfxWX1RWDiWL\n",
" Args:\n",
" query: MSFT\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: yahoo_finance_news\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mI should check the latest financial news about Microsoft stocks.\n",
"Action: yahoo_finance_news\n",
"Action Input: MSFT\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mMicrosoft (MSFT) Gains But Lags Market: What You Should Know\n",
"In the latest trading session, Microsoft (MSFT) closed at $328.79, marking a +0.12% move from the previous day.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI have the latest information on Microsoft stocks.\n",
"Final Answer: Microsoft (MSFT) closed at $328.79, with a +0.12% move from the previous day.\u001b[0m\n",
"Microsoft (MSFT), Meta Platforms (META) Reported “Home Run” Results: Dan Ives Recent Comments\n",
"Microsoft (MSFT) and Meta Platforms (META) delivered “home run” results yesterday, as the AI Revolution has not been slowed by the Trump administrations tariffs, Dan Ives, the Managing Director and Senior Equity Research Analyst at Wedbush Securities said on CNBC recently. Ives covers tech stocks. Tech Is Poised for a Comeback, Ives Indicates “The tech […]\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
"Today, Microsoft (MSFT) reported strong financial results, described as \"home run\" results by Dan Ives, Managing Director and Senior Equity Research Analyst at Wedbush Securities. Despite the Trump administrations tariffs, the AI Revolution driving tech stocks like Microsoft has not slowed down, indicating a positive outlook for the company and the tech sector overall.\n"
]
},
{
"data": {
"text/plain": [
"'Microsoft (MSFT) closed at $328.79, with a +0.12% move from the previous day.'"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent_chain.invoke(\n",
" \"What happened today with Microsoft stocks?\",\n",
")"
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"What happened today with Microsoft stocks?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "c899b64d-86a5-452c-b576-e94f485c27ea",
"execution_count": 5,
"id": "4496b06b-8b57-4fa8-9b86-4db407caa807",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"How does Microsoft feels today comparing with Nvidia?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" yahoo_finance_news (call_r9m4YxdEqWeXotkNgK8jGzeJ)\n",
" Call ID: call_r9m4YxdEqWeXotkNgK8jGzeJ\n",
" Args:\n",
" query: MSFT\n",
" yahoo_finance_news (call_fxj3AIKPB4MYuquvFFWrBD8B)\n",
" Call ID: call_fxj3AIKPB4MYuquvFFWrBD8B\n",
" Args:\n",
" query: NVDA\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: yahoo_finance_news\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mI should compare the current sentiment of Microsoft and Nvidia.\n",
"Action: yahoo_finance_news\n",
"Action Input: MSFT\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mMicrosoft (MSFT) Gains But Lags Market: What You Should Know\n",
"In the latest trading session, Microsoft (MSFT) closed at $328.79, marking a +0.12% move from the previous day.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI need to find the current sentiment of Nvidia as well.\n",
"Action: yahoo_finance_news\n",
"Action Input: NVDA\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI now know the current sentiment of both Microsoft and Nvidia.\n",
"Final Answer: I cannot compare the sentiment of Microsoft and Nvidia as I only have information about Microsoft.\u001b[0m\n",
"NVIDIA Corporation (NVDA): Among Ken Fishers Technology Stock Picks with Huge Upside Potential\n",
"We recently published an article titled Billionaire Ken Fishers 10 Technology Stock Picks with Huge Upside Potential. In this article, we are going to take a look at where NVIDIA Corporation (NASDAQ:NVDA) stands against the other technology stocks. Technology stocks have faced heightened volatility in 2025, with market sentiment swinging sharply in response to President Donald […]\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
"Nvidia (NVDA) Redesigns Chips to Sidestep U.S. Export Ban, Eyes June China Rollout\n",
"Nvidia plans China-specific AI chip revamp after new U.S. export limits\n",
"\n",
"Is NVIDIA (NVDA) the Best NASDAQ Stock to Buy According to Billionaires?\n",
"We recently published a list of 10 Best NASDAQ Stocks to Buy According to Billionaires. In this article, we are going to take a look at where NVIDIA Corporation (NASDAQ:NVDA) stands against other best NASDAQ stocks to buy according to billionaires. The latest market data shows that the US economy contracted at an annualized rate […]\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"Today, Microsoft (MSFT) is viewed positively with recent strong earnings reported, described as \"home run\" results, indicating confidence in its performance amid an ongoing AI revolution.\n",
"\n",
"Nvidia (NVDA) is also in focus with its strategic moves, such as redesigning AI chips to bypass U.S. export bans and targeting a China rollout. It is considered one of the technology stocks with significant upside potential, attracting attention from notable investors.\n",
"\n",
"In summary, both Microsoft and Nvidia have positive sentiments today, with Microsoft showing strong financial results and Nvidia making strategic advancements in AI technology and market positioning.\n"
]
},
{
"data": {
"text/plain": [
"'I cannot compare the sentiment of Microsoft and Nvidia as I only have information about Microsoft.'"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent_chain.invoke(\n",
" \"How does Microsoft feels today comparing with Nvidia?\",\n",
")"
"input_message = {\n",
" \"role\": \"user\",\n",
" \"content\": \"How does Microsoft feels today comparing with Nvidia?\",\n",
"}\n",
"\n",
"for step in agent.stream(\n",
" {\"messages\": [input_message]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
},
{
@@ -162,7 +185,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 6,
"id": "7879b79c-b5c7-4a5d-8338-edda53ff41a6",
"metadata": {},
"outputs": [],
@@ -172,17 +195,17 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 7,
"id": "ac989456-33bc-4478-874e-98b9cb24d113",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'No news found for company that searched with NVDA ticker.'"
"'NVIDIA Corporation (NVDA): Among Ken Fishers Technology Stock Picks with Huge Upside Potential\\nWe recently published an article titled Billionaire Ken Fishers 10 Technology Stock Picks with Huge Upside Potential. In this article, we are going to take a look at where NVIDIA Corporation (NASDAQ:NVDA) stands against the other technology stocks. Technology stocks have faced heightened volatility in 2025, with market sentiment swinging sharply in response to President Donald […]\\n\\nNvidia (NVDA) Redesigns Chips to Sidestep U.S. Export Ban, Eyes June China Rollout\\nNvidia plans China-specific AI chip revamp after new U.S. export limits\\n\\nIs NVIDIA (NVDA) the Best NASDAQ Stock to Buy According to Billionaires?\\nWe recently published a list of 10 Best NASDAQ Stocks to Buy According to Billionaires. In this article, we are going to take a look at where NVIDIA Corporation (NASDAQ:NVDA) stands against other best NASDAQ stocks to buy according to billionaires. The latest market data shows that the US economy contracted at an annualized rate […]'"
]
},
"execution_count": 38,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -193,7 +216,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 8,
"id": "46c697aa-102e-48d4-9834-081671aad40a",
"metadata": {},
"outputs": [
@@ -201,11 +224,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Top Research Reports for Apple, Broadcom & Caterpillar\n",
"Today's Research Daily features new research reports on 16 major stocks, including Apple Inc. (AAPL), Broadcom Inc. (AVGO) and Caterpillar Inc. (CAT).\n",
"\n",
"Apple Stock on Pace for Worst Month of the Year\n",
"Apple (AAPL) shares are on pace for their worst month of the year, according to Dow Jones Market Data. The stock is down 4.8% so far in August, putting it on pace for its worst month since December 2022, when it fell 12%.\n"
"No news found for company that searched with AAPL ticker.\n"
]
}
],
@@ -239,7 +258,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.13.2"
}
},
"nbformat": 4,

View File

@@ -11,8 +11,8 @@
"datasets stored in Aerospike. This new service lives outside of Aerospike and\n",
"builds an index to perform those searches.\n",
"\n",
"This notebook showcases the functionality of the LangChain Aerospike VectorStore\n",
"integration.\n",
"This notebook showcases the functionality of the [LangChain Aerospike VectorStore\n",
"integration](https://github.com/aerospike/langchain-aerospike).\n",
"\n",
"## Install AVS\n",
"\n",
@@ -25,11 +25,11 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"AVS_HOST = \"<avs-ip>\"\n",
"AVS_HOST = \"<avs_ip>\"\n",
"AVS_PORT = 5000"
]
},
@@ -43,15 +43,25 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"!pip install --upgrade --quiet aerospike-vector-search==3.0.1 langchain-community sentence-transformers langchain"
"!pip install --upgrade --quiet aerospike-vector-search==4.2.0 langchain-aerospike langchain-community sentence-transformers langchain"
]
},
{
@@ -65,28 +75,32 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2024-05-10 17:28:17-- https://github.com/aerospike/aerospike-vector-search-examples/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
"Resolving github.com (github.com)... 140.82.116.4\n",
"Connecting to github.com (github.com)|140.82.116.4|:443... connected.\n",
"--2025-05-07 21:06:30-- https://github.com/aerospike/aerospike-vector-search-examples/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
"Resolving github.com (github.com)... 140.82.116.3\n",
"Connecting to github.com (github.com)|140.82.116.3|:443... connected.\n",
"HTTP request sent, awaiting response... 301 Moved Permanently\n",
"Location: https://github.com/aerospike/aerospike-vector/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz [following]\n",
"--2025-05-07 21:06:30-- https://github.com/aerospike/aerospike-vector/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
"Reusing existing connection to github.com:443.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://raw.githubusercontent.com/aerospike/aerospike-vector-search-examples/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz [following]\n",
"--2024-05-10 17:28:17-- https://raw.githubusercontent.com/aerospike/aerospike-vector-search-examples/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n",
"Location: https://raw.githubusercontent.com/aerospike/aerospike-vector/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz [following]\n",
"--2025-05-07 21:06:30-- https://raw.githubusercontent.com/aerospike/aerospike-vector/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 11597643 (11M) [application/octet-stream]\n",
"Saving to: quotes.csv.tgz\n",
"\n",
"quotes.csv.tgz 100%[===================>] 11.06M 1.94MB/s in 6.1s \n",
"quotes.csv.tgz 100%[===================>] 11.06M 12.7MB/s in 0.9s \n",
"\n",
"2024-05-10 17:28:23 (1.81 MB/s) - quotes.csv.tgz saved [11597643/11597643]\n",
"2025-05-07 21:06:32 (12.7 MB/s) - quotes.csv.tgz saved [11597643/11597643]\n",
"\n"
]
}
@@ -106,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -132,14 +146,14 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"page_content=\"quote: I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.\" metadata={'source': './quotes.csv', 'row': 0, 'author': 'Marilyn Monroe', 'category': 'attributed-no-source, best, life, love, mistakes, out-of-control, truth, worst'}\n"
"page_content='quote: I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.' metadata={'source': './quotes.csv', 'row': 0, 'author': 'Marilyn Monroe', 'category': 'attributed-no-source, best, life, love, mistakes, out-of-control, truth, worst'}\n"
]
}
],
@@ -158,178 +172,18 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "60662fc2676a46a2ac48fbf30d9c85fe",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"modules.json: 0%| | 0.00/349 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "319412217d3944488f135c8bf8bca73b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config_sentence_transformers.json: 0%| | 0.00/116 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eb020ec2e2f4486294f85c490ef4a387",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"README.md: 0%| | 0.00/10.7k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "65d248263e4049bea4f6b554640a6aae",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"sentence_bert_config.json: 0%| | 0.00/53.0 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n"
"/var/folders/h5/lm2_c1xs3s32kwp11prnpftw0000gp/T/ipykernel_84638/3255399720.py:6: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-huggingface package and should be used instead. To use it run `pip install -U :class:`~langchain-huggingface` and import as `from :class:`~langchain_huggingface import HuggingFaceEmbeddings``.\n",
" embedder = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
"/Users/dwelch/Desktop/everything/projects/langchain/myfork/langchain/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c6b09a49fbd84c799ea28ace296406e3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config.json: 0%| | 0.00/612 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7e649688c67544d5af6bdd883c47d315",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model.safetensors: 0%| | 0.00/90.9M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "de447c7e4df1485ead14efae1faf96d6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/350 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "83ad1f289cd04f73aafca01a8e68e63b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2b612221e29e433cb50a54a6b838f5af",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1f5f0c29c58642478cd665731728dad0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/112 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dff1d16a5a6d4d20ac39adb5c9425cf6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"1_Pooling/config.json: 0%| | 0.00/190 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -352,7 +206,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -364,9 +218,9 @@
}
],
"source": [
"from aerospike_vector_search import AdminClient, Client, HostPort\n",
"from aerospike_vector_search import Client, HostPort\n",
"from aerospike_vector_search.types import VectorDistanceMetric\n",
"from langchain_community.vectorstores import Aerospike\n",
"from langchain_aerospike.vectorstores import Aerospike\n",
"\n",
"# Here we are using the AVS host and port you configured earlier\n",
"seed = HostPort(host=AVS_HOST, port=AVS_PORT)\n",
@@ -381,13 +235,10 @@
"VECTOR_KEY = \"vector\"\n",
"\n",
"client = Client(seeds=seed)\n",
"admin_client = AdminClient(\n",
" seeds=seed,\n",
")\n",
"index_exists = False\n",
"\n",
"# Check if the index already exists. If not, create it\n",
"for index in admin_client.index_list():\n",
"for index in client.index_list():\n",
" if index[\"id\"][\"namespace\"] == NAMESPACE and index[\"id\"][\"name\"] == INDEX_NAME:\n",
" index_exists = True\n",
" print(f\"{INDEX_NAME} already exists. Skipping creation\")\n",
@@ -395,7 +246,7 @@
"\n",
"if not index_exists:\n",
" print(f\"{INDEX_NAME} does not exist. Creating index\")\n",
" admin_client.index_create(\n",
" client.index_create(\n",
" namespace=NAMESPACE,\n",
" name=INDEX_NAME,\n",
" vector_field=VECTOR_KEY,\n",
@@ -409,8 +260,6 @@
" },\n",
" )\n",
"\n",
"admin_client.close()\n",
"\n",
"docstore = Aerospike.from_documents(\n",
" documents,\n",
" embedder,\n",
@@ -432,7 +281,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -440,31 +289,31 @@
"output_type": "stream",
"text": [
"~~~~ Document 0 ~~~~\n",
"auto-generated id: f53589dd-e3e0-4f55-8214-766ca8dc082f\n",
"auto-generated id: 4984b472-8a32-4552-b3eb-f03b31b68031\n",
"author: Carl Sagan, Cosmos\n",
"quote: The Cosmos is all that is or was or ever will be. Our feeblest contemplations of the Cosmos stir us -- there is a tingling in the spine, a catch in the voice, a faint sensation, as if a distant memory, of falling from a height. We know we are approaching the greatest of mysteries.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 1 ~~~~\n",
"auto-generated id: dde3e5d1-30b7-47b4-aab7-e319d14e1810\n",
"author: Elizabeth Gilbert\n",
"quote: The love that moves the sun and the other stars.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 2 ~~~~\n",
"auto-generated id: fd56575b-2091-45e7-91c1-9efff2fe5359\n",
"auto-generated id: 486c8d87-8dd7-450d-9008-d7549e680ffb\n",
"author: Renee Ahdieh, The Rose & the Dagger\n",
"quote: From the stars, to the stars.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 2 ~~~~\n",
"auto-generated id: 4b43b309-ce51-498c-b225-5254383b5b4a\n",
"author: Elizabeth Gilbert\n",
"quote: The love that moves the sun and the other stars.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 3 ~~~~\n",
"auto-generated id: 8567ed4e-885b-44a7-b993-e0caf422b3c9\n",
"auto-generated id: af784a10-f498-4570-bf81-2ffdca35440e\n",
"author: Dante Alighieri, Paradiso\n",
"quote: Love, that moves the sun and the other stars\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 4 ~~~~\n",
"auto-generated id: f868c25e-c54d-48cd-a5a8-14bf402f9ea8\n",
"auto-generated id: b45d5d5e-d818-4206-ae6b-b1d166ea3d43\n",
"author: Thich Nhat Hanh, Teachings on Love\n",
"quote: Through my love for you, I want to express my love for the whole cosmos, the whole of humanity, and all beings. By living with you, I want to learn to love everyone and all species. If I succeed in loving you, I will be able to love everyone and all species on Earth... This is the real message of love.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
@@ -502,7 +351,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -510,7 +359,7 @@
"output_type": "stream",
"text": [
"New IDs\n",
"['972846bd-87ae-493b-8ba3-a3d023c03948', '8171122e-cbda-4eb7-a711-6625b120893b', '53b54409-ac19-4d90-b518-d7c40bf5ee5d']\n"
"['adf8064e-9c0e-46e2-b193-169c36432f4c', 'cf65b5ed-a0f4-491a-86ad-dcacc23c2815', '2ef52efd-d9b7-4077-bc14-defdf0b7dd2f']\n"
]
}
],
@@ -552,7 +401,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -560,25 +409,25 @@
"output_type": "stream",
"text": [
"~~~~ Document 0 ~~~~\n",
"auto-generated id: 67d5b23f-b2d2-4872-80ad-5834ea08aa64\n",
"auto-generated id: 91e77b39-a528-40c6-a58a-486ae85f991a\n",
"author: John Grogan, Marley and Me: Life and Love With the World's Worst Dog\n",
"quote: Such short little lives our pets have to spend with us, and they spend most of it waiting for us to come home each day. It is amazing how much love and laughter they bring into our lives and even how much closer we become with each other because of them.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 1 ~~~~\n",
"auto-generated id: a9b28eb0-a21c-45bf-9e60-ab2b80e988d8\n",
"auto-generated id: c585b4ec-92b5-4579-948c-0529373abc2a\n",
"author: John Grogan, Marley and Me: Life and Love With the World's Worst Dog\n",
"quote: Dogs are great. Bad dogs, if you can really call them that, are perhaps the greatest of them all.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 2 ~~~~\n",
"auto-generated id: ee7434c8-2551-4651-8a22-58514980fb4a\n",
"auto-generated id: 5768b31c-fac4-4af7-84b4-fb11bbfcb590\n",
"author: Colleen Houck, Tiger's Curse\n",
"quote: He then put both hands on the door on either side of my head and leaned in close, pinning me against it. I trembled like a downy rabbit caught in the clutches of a wolf. The wolf came closer. He bent his head and began nuzzling my cheek. The problem was…I wanted the wolf to devour me.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 3 ~~~~\n",
"auto-generated id: 9170804c-a155-473b-ab93-8a561dd48f91\n",
"auto-generated id: 94f1b9fb-ad57-4f65-b470-7f49dd6c274c\n",
"author: Ray Bradbury\n",
"quote: Stuff your eyes with wonder,\" he said, \"live as if you'd drop dead in ten seconds. See the world. It's more fantastic than any dream made or paid for in factories. Ask no guarantees, ask for no security, there never was such an animal. And if there were, it would be related to the great sloth which hangs upside down in a tree all day every day, sleeping its life away. To hell with that,\" he said, \"shake the tree and knock the great sloth down on his ass.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
@@ -607,7 +456,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -615,25 +464,25 @@
"output_type": "stream",
"text": [
"~~~~ Document 0 ~~~~\n",
"auto-generated id: 2c1d6ee1-b742-45ea-bed6-24a1f655c849\n",
"auto-generated id: 6d9e67a6-0427-41e6-9e24-050518120d74\n",
"author: Roy T. Bennett, The Light in the Heart\n",
"quote: Never lose hope. Storms make people stronger and never last forever.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 1 ~~~~\n",
"auto-generated id: 5962c2cf-ffb5-4e03-9257-bdd630b5c7e9\n",
"auto-generated id: 7d426e59-7935-4bcf-a676-cbe8dd4860e7\n",
"author: Roy T. Bennett, The Light in the Heart\n",
"quote: Difficulties and adversities viciously force all their might on us and cause us to fall apart, but they are necessary elements of individual growth and reveal our true potential. We have got to endure and overcome them, and move forward. Never lose hope. Storms make people stronger and never last forever.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 2 ~~~~\n",
"auto-generated id: 3bbcc4ca-de89-4196-9a46-190a50bf6c47\n",
"auto-generated id: 6ec05e48-d162-440d-8819-001d2f3712f9\n",
"author: Vincent van Gogh, The Letters of Vincent van Gogh\n",
"quote: There is peace even in the storm\n",
"~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"~~~~ Document 3 ~~~~\n",
"auto-generated id: 37d8cf02-fc2f-429d-b2b6-260a05286108\n",
"auto-generated id: d3c3de59-4da4-4ae6-8f6d-83ed905dd320\n",
"author: Edwin Morgan, A Book of Lives\n",
"quote: Valentine WeatherKiss me with rain on your eyelashes,come on, let us sway together,under the trees, and to hell with thunder.\n",
"~~~~~~~~~~~~~~~~~~~~\n",
@@ -665,7 +514,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -684,7 +533,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
@@ -698,7 +547,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.11.12"
}
},
"nbformat": 4,

View File

@@ -7,9 +7,11 @@
"source": [
"# Astra DB Vector Store\n",
"\n",
"This page provides a quickstart for using [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as a Vector Store.\n",
"This page provides a quickstart for using Astra DB as a Vector Store.\n",
"\n",
"> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available through an easy-to-use JSON API.\n",
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
"> through an easy-to-use JSON API.\n",
"\n",
"## Setup"
]
@@ -19,6 +21,8 @@
"id": "dbe7c156-0413-47e3-9237-4769c4248869",
"metadata": {},
"source": [
"### Dependencies\n",
"\n",
"Use of the integration requires the `langchain-astradb` partner package:"
]
},
@@ -26,10 +30,15 @@
"cell_type": "code",
"execution_count": null,
"id": "8d00fcf4-9798-4289-9214-d9734690adfc",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"pip install -qU \"langchain-astradb>=0.3.3\""
"!pip install \\\n",
" \"langchain>=0.3.23,<0.4\" \\\n",
" \"langchain-core>=0.3.52,<0.4\" \\\n",
" \"langchain-astradb>=0.6,<0.7\""
]
},
{
@@ -41,30 +50,40 @@
"\n",
"In order to use the AstraDB vector store, you must first head to the [AstraDB website](https://astra.datastax.com), create an account, and then create a new database - the initialization might take a few minutes. \n",
"\n",
"Once the database has been initialized, you should [create an application token](https://docs.datastax.com/en/astra-db-serverless/administration/manage-application-tokens.html#generate-application-token) and save it for later use. \n",
"Once the database has been initialized, retrieve your [connection secrets](https://docs.datastax.com/en/astra-db-serverless/get-started/quickstart.html#create-a-database-and-store-your-credentials), which you'll need momentarily. These are:\n",
"- an **`API Endpoint`**, such as `\"https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com/\"`\n",
"- and a **`Database Token`**, e.g. `\"AstraCS:aBcD123......\"`\n",
"\n",
"You will also want to copy the `API Endpoint` from the `Database Details` and store that in the `ASTRA_DB_API_ENDPOINT` variable.\n",
"\n",
"You may optionally provide a namespace, which you can manage from the `Data Explorer` tab of your database dashboard. If you don't wish to set a namespace, you can leave the `getpass` prompt for `ASTRA_DB_NAMESPACE` empty."
"You may optionally provide a **`keyspace`** (called \"namespace\" in the LangChain components), which you can manage from the `Data Explorer` tab of your database dashboard. If you wish, you can leave it empty in the prompt below and fall back to a default keyspace."
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 1,
"id": "b7843c22",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
"ASTRA_DB_APPLICATION_TOKEN = ········\n",
"(optional) ASTRA_DB_KEYSPACE = \n"
]
}
],
"source": [
"import getpass\n",
"\n",
"ASTRA_DB_API_ENDPOINT = getpass.getpass(\"ASTRA_DB_API_ENDPOINT = \")\n",
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n",
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \").strip()\n",
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \").strip()\n",
"\n",
"desired_namespace = getpass.getpass(\"ASTRA_DB_NAMESPACE = \")\n",
"if desired_namespace:\n",
" ASTRA_DB_NAMESPACE = desired_namespace\n",
"desired_keyspace = input(\"(optional) ASTRA_DB_KEYSPACE = \").strip()\n",
"if desired_keyspace:\n",
" ASTRA_DB_KEYSPACE = desired_keyspace\n",
"else:\n",
" ASTRA_DB_NAMESPACE = None"
" ASTRA_DB_KEYSPACE = None"
]
},
{
@@ -77,7 +96,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "3cb739c0",
"metadata": {},
"outputs": [],
@@ -93,28 +112,46 @@
"source": [
"## Initialization\n",
"\n",
"There are two ways to create an Astra DB vector store, which differ in how the embeddings are computed.\n",
"There are various ways to create an Astra DB vector store:\n",
"\n",
"#### Method 1: Explicit embeddings\n",
"\n",
"You can separately instantiate a `langchain_core.embeddings.Embeddings` class and pass it to the `AstraDBVectorStore` constructor, just like with most other LangChain vector stores.\n",
"\n",
"#### Method 2: Integrated embedding computation\n",
"#### Method 2: Server-side embeddings ('vectorize')\n",
"\n",
"Alternatively, you can use the [Vectorize](https://www.datastax.com/blog/simplifying-vector-embedding-generation-with-astra-vectorize) feature of Astra DB and simply specify the name of a supported embedding model when creating the store. The embedding computations are entirely handled within the database. (To proceed with this method, you must have enabled the desired embedding integration for your database, as described [in the docs](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html).)\n",
"Alternatively, you can use the [server-side embedding computation](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html) feature of Astra DB ('vectorize') and simply specify an embedding model when creating the server infrastructure for the store. The embedding computations will then be entirely handled within the database in subsequent read and write operations. (To proceed with this method, you must have enabled the desired embedding integration for your database, as described [in the docs](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html).)\n",
"\n",
"### Explicit Embedding Initialization\n",
"#### Method 3: Auto-detect from a pre-existing collection\n",
"\n",
"Below, we instantiate our vector store using the explicit embedding class:\n",
"You may already have a [collection](https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html) in your Astra DB, possibly pre-populated with data through other means (e.g. via the Astra UI or a third-party application), and just want to start querying it within LangChain. In this case, the right approach is to enable the `autodetect_collection` mode in the vector store constructor and let the class figure out the details. (Of course, if your collection has no 'vectorize', you still need to provide an `Embeddings` object).\n",
"\n",
"#### A note on \"hybrid search\"\n",
"\n",
"Astra DB vector stores support metadata search in vector searches; furthermore, version 0.6 introduced full support for _hybrid search_ through the [findAndRerank](https://docs.datastax.com/en/astra-db-serverless/api-reference/document-methods/find-and-rerank.html) database primitive: documents are retrieved from both a vector-similarity _and_ a keyword-based (\"lexical\") search, and are then merged through a reranker model. This search strategy, entirely handled on server-side, can boost the accuracy of your results, thus improving the quality of your RAG application. Whenever available, hybrid search is used automatically by the vector store (though you can exert manual control over it if you wish to do so).\n",
"\n",
"#### Additional information\n",
"\n",
"The `AstraDBVectorStore` can be configured in many ways; see the [API Reference](https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html) for a full guide covering e.g. asynchronous initialization; non-Astra-DB databases; custom indexing allow-/deny-lists; manual hybrid-search control; and much more."
]
},
{
"cell_type": "markdown",
"id": "8d7e33e0-f948-47b5-a9c2-6407fdde170e",
"metadata": {},
"source": [
"### Explicit embedding initialization (method 1)\n",
"\n",
"Instantiate our vector store using an explicit embedding class:\n",
"\n",
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
"\n",
"<EmbeddingTabs/>\n"
"<EmbeddingTabs/>"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 3,
"id": "d71a1dcb",
"metadata": {},
"outputs": [],
@@ -128,19 +165,19 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 4,
"id": "0b32730d-176e-414c-9d91-fd3644c54211",
"metadata": {},
"outputs": [],
"source": [
"from langchain_astradb import AstraDBVectorStore\n",
"\n",
"vector_store = AstraDBVectorStore(\n",
"vector_store_explicit_embeddings = AstraDBVectorStore(\n",
" collection_name=\"astra_vector_langchain\",\n",
" embedding=embeddings,\n",
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
" namespace=ASTRA_DB_NAMESPACE,\n",
" namespace=ASTRA_DB_KEYSPACE,\n",
")"
]
},
@@ -149,26 +186,26 @@
"id": "84a1fe85-a42c-4f15-92e1-f79f1dd43ea2",
"metadata": {},
"source": [
"### Integrated Embedding Initialization\n",
"### Server-side embedding initialization (\"vectorize\", method 2)\n",
"\n",
"Here it is assumed that you have\n",
"In this example code, it is assumed that you have\n",
"\n",
"- Enabled the OpenAI integration in your Astra DB organization,\n",
"- Added an API Key named `\"OPENAI_API_KEY\"` to the integration, and scoped it to the database you are using.\n",
"\n",
"For more details on how to do this, please consult the [documentation](https://docs.datastax.com/en/astra-db-serverless/integrations/embedding-providers/openai.html)."
"For more details, including instructions to switch provider/model, please consult the [documentation](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html)."
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "9d18455d-3fa6-4f9e-b687-3a2bc71c9a23",
"metadata": {},
"outputs": [],
"source": [
"from astrapy.info import CollectionVectorServiceOptions\n",
"from astrapy.info import VectorServiceOptions\n",
"\n",
"openai_vectorize_options = CollectionVectorServiceOptions(\n",
"openai_vectorize_options = VectorServiceOptions(\n",
" provider=\"openai\",\n",
" model_name=\"text-embedding-3-small\",\n",
" authentication={\n",
@@ -176,125 +213,183 @@
" },\n",
")\n",
"\n",
"vector_store_integrated = AstraDBVectorStore(\n",
" collection_name=\"astra_vector_langchain_integrated\",\n",
"vector_store_integrated_embeddings = AstraDBVectorStore(\n",
" collection_name=\"astra_vectorize_langchain\",\n",
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
" namespace=ASTRA_DB_NAMESPACE,\n",
" namespace=ASTRA_DB_KEYSPACE,\n",
" collection_vector_service_options=openai_vectorize_options,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "24508a60-9591-4b24-a9b7-ecc90ed71b68",
"metadata": {},
"source": [
"### Auto-detect initialization (method 3)\n",
"\n",
"You can use this pattern if the collection already exists on the database and your `AstraDBVectorStore` needs to use it (for reads and writes). The LangChain component will inspect the collection and figure out the details.\n",
"\n",
"This is the recommended approach if the collection has been created and -- most importantly -- populated by tools other than LangChain, for example if the data has been ingested through the Astra DB Web interface.\n",
"\n",
"Auto-detect mode cannot coexist with _collection_ settings (such as the similarity metric and such); on the other hand, if no server-side embeddings are employed, one still needs to pass an `Embeddings` object to the constructor.\n",
"\n",
"In the following example code, we will \"auto-detect\" the very same collection that was created by method 2 above (\"vectorize\"). Hence, no `Embeddings` object needs to be supplied."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "683b0f6e-884f-4a09-bc3a-454bb1eefd30",
"metadata": {},
"outputs": [],
"source": [
"vector_store_autodetected = AstraDBVectorStore(\n",
" collection_name=\"astra_vectorize_langchain\",\n",
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
" namespace=ASTRA_DB_KEYSPACE,\n",
" autodetect_collection=True,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "fbcfe8e8-2f4e-4fc7-a332-7a2fa2c401bf",
"metadata": {},
"source": [
"## Manage vector store\n",
"\n",
"Once you have created your vector store, interact with it by adding and deleting different items.\n",
"\n",
"All interactions with the vector store proceed regardless of the initialization method: please **adapt the following cell**, if you desire, to select a vector store you have created and want to put to test."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "54d63f59-1e6b-49b4-a7c1-ac7717c92ac0",
"metadata": {},
"outputs": [],
"source": [
"# If desired, uncomment a different line here:\n",
"\n",
"# vector_store = vector_store_explicit_embeddings\n",
"vector_store = vector_store_integrated_embeddings\n",
"# vector_store = vector_store_autodetected"
]
},
{
"cell_type": "markdown",
"id": "d3796b39",
"metadata": {},
"source": [
"## Manage vector store\n",
"\n",
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
"\n",
"### Add items to vector store\n",
"\n",
"We can add items to our vector store by using the `add_documents` function."
"Add documents to the vector store by using the `add_documents` method.\n",
"\n",
"_The \"id\" field can be supplied separately, in a matching `ids=[...]` parameter to `add_documents`, or even left out entirely to let the store generate IDs._"
]
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 8,
"id": "afb3e155",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[UUID('89a5cea1-5f3d-47c1-89dc-7e36e12cf4de'),\n",
" UUID('d4e78c48-f954-4612-8a38-af22923ba23b'),\n",
" UUID('058e4046-ded0-4fc1-b8ac-60e5a5f08ea0'),\n",
" UUID('50ab2a9a-762c-4b78-b102-942a86d77288'),\n",
" UUID('1da5a3c1-ba51-4f2f-aaaf-79a8f5011ce3'),\n",
" UUID('f3055d9e-2eb1-4d25-838e-2c70548f91b5'),\n",
" UUID('4bf0613d-08d0-4fbc-a43c-4955e4c9e616'),\n",
" UUID('18008625-8fd4-45c2-a0d7-92a2cde23dbc'),\n",
" UUID('c712e06f-790b-4fd4-9040-7ab3898965d0'),\n",
" UUID('a9b84820-3445-4810-a46c-e77b76ab85bc')]"
"['entry_00',\n",
" 'entry_01',\n",
" 'entry_02',\n",
" 'entry_03',\n",
" 'entry_04',\n",
" 'entry_05',\n",
" 'entry_06',\n",
" 'entry_07',\n",
" 'entry_08',\n",
" 'entry_09',\n",
" 'entry_10']"
]
},
"execution_count": 23,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from uuid import uuid4\n",
"\n",
"from langchain_core.documents import Document\n",
"\n",
"document_1 = Document(\n",
" page_content=\"I had chocolate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"document_2 = Document(\n",
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
" metadata={\"source\": \"news\"},\n",
")\n",
"\n",
"document_3 = Document(\n",
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"document_4 = Document(\n",
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
" metadata={\"source\": \"news\"},\n",
")\n",
"\n",
"document_5 = Document(\n",
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"document_6 = Document(\n",
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
" metadata={\"source\": \"website\"},\n",
")\n",
"\n",
"document_7 = Document(\n",
" page_content=\"The top 10 soccer players in the world right now.\",\n",
" metadata={\"source\": \"website\"},\n",
")\n",
"\n",
"document_8 = Document(\n",
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"document_9 = Document(\n",
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
" metadata={\"source\": \"news\"},\n",
")\n",
"\n",
"document_10 = Document(\n",
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"documents = [\n",
" document_1,\n",
" document_2,\n",
" document_3,\n",
" document_4,\n",
" document_5,\n",
" document_6,\n",
" document_7,\n",
" document_8,\n",
" document_9,\n",
" document_10,\n",
"documents_to_insert = [\n",
" Document(\n",
" page_content=\"ZYX, just another tool in the world, is actually my agent-based superhero\",\n",
" metadata={\"source\": \"tweet\"},\n",
" id=\"entry_00\",\n",
" ),\n",
" Document(\n",
" page_content=\"I had chocolate chip pancakes and scrambled eggs \"\n",
" \"for breakfast this morning.\",\n",
" metadata={\"source\": \"tweet\"},\n",
" id=\"entry_01\",\n",
" ),\n",
" Document(\n",
" page_content=\"The weather forecast for tomorrow is cloudy and \"\n",
" \"overcast, with a high of 62 degrees.\",\n",
" metadata={\"source\": \"news\"},\n",
" id=\"entry_02\",\n",
" ),\n",
" Document(\n",
" page_content=\"Building an exciting new project with LangChain \"\n",
" \"- come check it out!\",\n",
" metadata={\"source\": \"tweet\"},\n",
" id=\"entry_03\",\n",
" ),\n",
" Document(\n",
" page_content=\"Robbers broke into the city bank and stole \"\n",
" \"$1 million in cash.\",\n",
" metadata={\"source\": \"news\"},\n",
" id=\"entry_04\",\n",
" ),\n",
" Document(\n",
" page_content=\"Thanks to her sophisticated language skills, the agent \"\n",
" \"managed to extract strategic information all right.\",\n",
" metadata={\"source\": \"tweet\"},\n",
" id=\"entry_05\",\n",
" ),\n",
" Document(\n",
" page_content=\"Is the new iPhone worth the price? Read this \"\n",
" \"review to find out.\",\n",
" metadata={\"source\": \"website\"},\n",
" id=\"entry_06\",\n",
" ),\n",
" Document(\n",
" page_content=\"The top 10 soccer players in the world right now.\",\n",
" metadata={\"source\": \"website\"},\n",
" id=\"entry_07\",\n",
" ),\n",
" Document(\n",
" page_content=\"LangGraph is the best framework for building stateful, \"\n",
" \"agentic applications!\",\n",
" metadata={\"source\": \"tweet\"},\n",
" id=\"entry_08\",\n",
" ),\n",
" Document(\n",
" page_content=\"The stock market is down 500 points today due to \"\n",
" \"fears of a recession.\",\n",
" metadata={\"source\": \"news\"},\n",
" id=\"entry_09\",\n",
" ),\n",
" Document(\n",
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
" metadata={\"source\": \"tweet\"},\n",
" id=\"entry_10\",\n",
" ),\n",
"]\n",
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
"\n",
"vector_store.add_documents(documents=documents, ids=uuids)"
"\n",
"vector_store.add_documents(documents=documents_to_insert)"
]
},
{
@@ -304,12 +399,12 @@
"source": [
"### Delete items from vector store\n",
"\n",
"We can delete items from our vector store by ID by using the `delete` function."
"Delete items by ID by using the `delete` function."
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 9,
"id": "d3f69315",
"metadata": {},
"outputs": [
@@ -319,13 +414,13 @@
"True"
]
},
"execution_count": 24,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vector_store.delete(ids=uuids[-1])"
"vector_store.delete(ids=[\"entry_10\", \"entry_02\"])"
]
},
{
@@ -333,20 +428,20 @@
"id": "d12e1a07",
"metadata": {},
"source": [
"## Query vector store\n",
"## Query the vector store\n",
"\n",
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
"Once the vector store is created and populated, you can query it (e.g. as part of your chain or agent).\n",
"\n",
"### Query directly\n",
"\n",
"#### Similarity search\n",
"\n",
"Performing a simple similarity search with filtering on metadata can be done as follows:"
"Search for documents similar to a provided text, with additional metadata filters if desired:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 10,
"id": "770b3467",
"metadata": {},
"outputs": [
@@ -354,19 +449,20 @@
"name": "stdout",
"output_type": "stream",
"text": [
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
"* \"Building an exciting new project with LangChain - come check it out!\", metadata={'source': 'tweet'}\n",
"* \"LangGraph is the best framework for building stateful, agentic applications!\", metadata={'source': 'tweet'}\n",
"* \"Thanks to her sophisticated language skills, the agent managed to extract strategic information all right.\", metadata={'source': 'tweet'}\n"
]
}
],
"source": [
"results = vector_store.similarity_search(\n",
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
" k=2,\n",
" k=3,\n",
" filter={\"source\": \"tweet\"},\n",
")\n",
"for res in results:\n",
" print(f\"* {res.page_content} [{res.metadata}]\")"
" print(f'* \"{res.page_content}\", metadata={res.metadata}')"
]
},
{
@@ -376,12 +472,12 @@
"source": [
"#### Similarity search with score\n",
"\n",
"You can also search with score:"
"You can return the similarity score as well:"
]
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 11,
"id": "5924309a",
"metadata": {},
"outputs": [
@@ -389,16 +485,69 @@
"name": "stdout",
"output_type": "stream",
"text": [
"* [SIM=0.776585] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
"* [SIM=0.71] \"Building an exciting new project with LangChain - come check it out!\", metadata={'source': 'tweet'}\n",
"* [SIM=0.70] \"LangGraph is the best framework for building stateful, agentic applications!\", metadata={'source': 'tweet'}\n",
"* [SIM=0.61] \"Thanks to her sophisticated language skills, the agent managed to extract strategic information all right.\", metadata={'source': 'tweet'}\n"
]
}
],
"source": [
"results = vector_store.similarity_search_with_score(\n",
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
" k=3,\n",
" filter={\"source\": \"tweet\"},\n",
")\n",
"for res, score in results:\n",
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
" print(f'* [SIM={score:.2f}] \"{res.page_content}\", metadata={res.metadata}')"
]
},
{
"cell_type": "markdown",
"id": "73b8f418-91a7-46d0-91c3-3c76e9586193",
"metadata": {},
"source": [
"#### Specify a different keyword query (requires hybrid search)\n",
"\n",
"> Note: this cell can be run only if the collection supports the [find-and-rerank](https://docs.datastax.com/en/astra-db-serverless/api-reference/document-methods/find-and-rerank.html) command and if the vector store is aware of this fact.\n",
"\n",
"If the vector store is using a hybrid-enabled collection and has detected this fact, by default it will use that capability when running searches.\n",
"\n",
"In that case, the same query text is used for both the vector-similarity and the lexical-based retrieval steps in the find-and-rerank process, _unless you explicitly provide a different query for the latter_:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e282a48b-081a-4d94-9483-33407e8d6da7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* \"Building an exciting new project with LangChain - come check it out!\", metadata={'source': 'tweet'}\n",
"* \"LangGraph is the best framework for building stateful, agentic applications!\", metadata={'source': 'tweet'}\n",
"* \"ZYX, just another tool in the world, is actually my agent-based superhero\", metadata={'source': 'tweet'}\n"
]
}
],
"source": [
"results = vector_store_autodetected.similarity_search(\n",
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
" k=3,\n",
" filter={\"source\": \"tweet\"},\n",
" lexical_query=\"agent\",\n",
")\n",
"for res in results:\n",
" print(f'* \"{res.page_content}\", metadata={res.metadata}')"
]
},
{
"cell_type": "markdown",
"id": "60688e8c-d74d-4921-b213-b48d88600f95",
"metadata": {},
"source": [
"_The above example hardcodes the \"autodetected\" vector store, which has surely inspected the collection and figured out if hybrid is available. Another option is to explicitly supply hybrid-search parameters to the constructor (refer to the API Reference for more details/examples)._"
]
},
{
@@ -408,7 +557,9 @@
"source": [
"#### Other search methods\n",
"\n",
"There are a variety of other search methods that are not covered in this notebook, such as MMR search or searching by vector. For a full list of the search abilities available for `AstraDBVectorStore` check out the [API reference](https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html)."
"There are a variety of other search methods that are not covered in this notebook, such as MMR search and search by vector.\n",
"\n",
"For a full list of the search modes available in `AstraDBVectorStore` check out the [API reference](https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html)."
]
},
{
@@ -418,24 +569,24 @@
"source": [
"### Query by turning into retriever\n",
"\n",
"You can also transform the vector store into a retriever for easier usage in your chains. \n",
"You can also make the vector store into a retriever, for easier usage in your chains. \n",
"\n",
"Here is how to transform your vector store into a retriever and then invoke the retreiever with a simple query and filter."
"Transform the vector store into a retriever and invoke it with a simple query + metadata filter:"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 13,
"id": "dcee50e6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
"[Document(id='entry_04', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
]
},
"execution_count": 17,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -490,7 +641,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "fd405a13-6f71-46fa-87e6-167238e9c25e",
"metadata": {},
"outputs": [],
@@ -505,7 +656,7 @@
"source": [
"## API reference\n",
"\n",
"For detailed documentation of all `AstraDBVectorStore` features and configurations head to the API reference: https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html"
"For detailed documentation of all `AstraDBVectorStore` features and configurations, consult the [API reference](https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html)."
]
}
],
@@ -525,7 +676,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
"version": "3.12.0"
}
},
"nbformat": 4,

View File

@@ -0,0 +1,450 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7679dd7b-7ed4-4755-a499-824deadba708",
"metadata": {},
"source": [
"# Gel \n",
"\n",
"> An implementation of LangChain vectorstore abstraction using `gel` as the backend.\n",
"\n",
"[Gel](https://www.geldata.com/) is an open-source PostgreSQL data layer optimized for fast development to production cycle. It comes with a high-level strictly typed graph-like data model, composable hierarchical query language, full SQL support, migrations, Auth and AI modules.\n",
"\n",
"The code lives in an integration package called [langchain-gel](https://github.com/geldata/langchain-gel).\n",
"\n",
"## Setup\n",
"\n",
"First install relevant packages:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "92df32f0",
"metadata": {},
"outputs": [],
"source": [
"! pip install -qU gel langchain-gel "
]
},
{
"cell_type": "markdown",
"id": "68ef6ebb",
"metadata": {},
"source": [
"## Initialization\n",
"\n",
"In order to use Gel as a backend for your `VectorStore`, you're going to need a working Gel instance.\n",
"Fortunately, it doesn't have to involve Docker containers or anything complicated, unless you want to!\n",
"\n",
"To set up a local instance, run:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b79938d3",
"metadata": {},
"outputs": [],
"source": [
"! gel project init --non-interactive"
]
},
{
"cell_type": "markdown",
"id": "08e79230",
"metadata": {},
"source": [
"If you are using [Gel Cloud](https://cloud.geldata.com/) (and you should!), add one more argument to that command:\n",
"\n",
"```bash\n",
"gel project init --server-instance <org-name>/<instance-name>\n",
"```\n",
"\n",
"For a comprehensive list of ways to run Gel, take a look at [Running Gel](https://docs.geldata.com/reference/running) section of the reference docs.\n",
"\n",
"### Set up the schema\n",
"\n",
"[Gel schema](https://docs.geldata.com/reference/datamodel) is an explicit high-level description of your application's data model. \n",
"Aside from enabling you to define exactly how your data is going to be laid out, it drives Gel's many powerful features such as links, access policies, functions, triggers, constraints, indexes, and more.\n",
"\n",
"The LangChain's `VectorStore` expects the following layout for the schema:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "9a7edd58",
"metadata": {},
"outputs": [],
"source": [
"schema_content = \"\"\"\n",
"using extension pgvector;\n",
" \n",
"module default {\n",
" scalar type EmbeddingVector extending ext::pgvector::vector<1536>;\n",
"\n",
" type Record {\n",
" required collection: str;\n",
" text: str;\n",
" embedding: EmbeddingVector; \n",
" external_id: str {\n",
" constraint exclusive;\n",
" };\n",
" metadata: json;\n",
"\n",
" index ext::pgvector::hnsw_cosine(m := 16, ef_construction := 128)\n",
" on (.embedding)\n",
" } \n",
"}\n",
"\"\"\".strip()\n",
"\n",
"with open(\"dbschema/default.gel\", \"w\") as f:\n",
" f.write(schema_content)"
]
},
{
"cell_type": "markdown",
"id": "90320ef1",
"metadata": {},
"source": [
"In order to apply schema changes to the database, run a migration using Gel's [migration mechanism](https://docs.geldata.com/reference/datamodel/migrations):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cdff483e",
"metadata": {},
"outputs": [],
"source": [
"! gel migration create --non-interactive\n",
"! gel migrate"
]
},
{
"cell_type": "markdown",
"id": "b2290ef2",
"metadata": {},
"source": [
"From this point onward, `GelVectorStore` can be used as a drop-in replacement for any other vectorstore available in LangChain."
]
},
{
"cell_type": "markdown",
"id": "ec44dfcc",
"metadata": {},
"source": [
"## Instantiation\n",
"\n",
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
"\n",
"<EmbeddingTabs/>\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "94f5c129",
"metadata": {},
"outputs": [],
"source": [
"# | output: false\n",
"# | echo: false\n",
"from langchain_openai import OpenAIEmbeddings\n",
"\n",
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "979a65bd-742f-4b0d-be1e-c0baae245ec6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain_gel import GelVectorStore\n",
"\n",
"vector_store = GelVectorStore(\n",
" embeddings=embeddings,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "61a224a1-d70b-4daf-86ba-ab6e43c08b50",
"metadata": {},
"source": [
"## Manage vector store\n",
"\n",
"### Add items to vector store\n",
"\n",
"Note that adding documents by ID will over-write any existing documents that match that ID."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88a288cc-ffd4-4800-b011-750c72b9fd10",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain_core.documents import Document\n",
"\n",
"docs = [\n",
" Document(\n",
" page_content=\"there are cats in the pond\",\n",
" metadata={\"id\": \"1\", \"location\": \"pond\", \"topic\": \"animals\"},\n",
" ),\n",
" Document(\n",
" page_content=\"ducks are also found in the pond\",\n",
" metadata={\"id\": \"2\", \"location\": \"pond\", \"topic\": \"animals\"},\n",
" ),\n",
" Document(\n",
" page_content=\"fresh apples are available at the market\",\n",
" metadata={\"id\": \"3\", \"location\": \"market\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the market also sells fresh oranges\",\n",
" metadata={\"id\": \"4\", \"location\": \"market\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the new art exhibit is fascinating\",\n",
" metadata={\"id\": \"5\", \"location\": \"museum\", \"topic\": \"art\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a sculpture exhibit is also at the museum\",\n",
" metadata={\"id\": \"6\", \"location\": \"museum\", \"topic\": \"art\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a new coffee shop opened on Main Street\",\n",
" metadata={\"id\": \"7\", \"location\": \"Main Street\", \"topic\": \"food\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the book club meets at the library\",\n",
" metadata={\"id\": \"8\", \"location\": \"library\", \"topic\": \"reading\"},\n",
" ),\n",
" Document(\n",
" page_content=\"the library hosts a weekly story time for kids\",\n",
" metadata={\"id\": \"9\", \"location\": \"library\", \"topic\": \"reading\"},\n",
" ),\n",
" Document(\n",
" page_content=\"a cooking class for beginners is offered at the community center\",\n",
" metadata={\"id\": \"10\", \"location\": \"community center\", \"topic\": \"classes\"},\n",
" ),\n",
"]\n",
"\n",
"vector_store.add_documents(docs, ids=[doc.metadata[\"id\"] for doc in docs])"
]
},
{
"cell_type": "markdown",
"id": "0c712fa3",
"metadata": {},
"source": [
"### Delete items from vector store"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a5b2b71f-49eb-407d-b03a-dea4c0a517d6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"vector_store.delete(ids=[\"3\"])"
]
},
{
"cell_type": "markdown",
"id": "59f82250-7903-4279-8300-062542c83416",
"metadata": {},
"source": [
"## Query vector store\n",
"\n",
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
"\n",
"### Filtering Support\n",
"\n",
"The vectorstore supports a set of filters that can be applied against the metadata fields of the documents.\n",
"\n",
"| Operator | Meaning/Category |\n",
"|----------|-------------------------|\n",
"| \\$eq | Equality (==) |\n",
"| \\$ne | Inequality (!=) |\n",
"| \\$lt | Less than (&lt;) |\n",
"| \\$lte | Less than or equal (&lt;=) |\n",
"| \\$gt | Greater than (>) |\n",
"| \\$gte | Greater than or equal (>=) |\n",
"| \\$in | Special Cased (in) |\n",
"| \\$nin | Special Cased (not in) |\n",
"| \\$between | Special Cased (between) |\n",
"| \\$like | Text (like) |\n",
"| \\$ilike | Text (case-insensitive like) |\n",
"| \\$and | Logical (and) |\n",
"| \\$or | Logical (or) |\n",
"\n",
"### Query directly\n",
"\n",
"Performing a simple similarity search can be done as follows:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f15a2359-6dc3-4099-8214-785f167a9ca4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"results = vector_store.similarity_search(\n",
" \"kitty\", k=10, filter={\"id\": {\"$in\": [\"1\", \"5\", \"2\", \"9\"]}}\n",
")\n",
"for doc in results:\n",
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
]
},
{
"cell_type": "markdown",
"id": "d92ea049-1b1f-4ae9-9525-35750fe2e52e",
"metadata": {},
"source": [
"If you provide a dict with multiple fields, but no operators, the top level will be interpreted as a logical **AND** filter"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88f919e4-e4b0-4b5f-99b3-24c675c26d33",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"vector_store.similarity_search(\n",
" \"ducks\",\n",
" k=10,\n",
" filter={\n",
" \"id\": {\"$in\": [\"1\", \"5\", \"2\", \"9\"]},\n",
" \"location\": {\"$in\": [\"pond\", \"market\"]},\n",
" },\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88f423a4-6575-4fb8-9be2-a3da01106591",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"vector_store.similarity_search(\n",
" \"ducks\",\n",
" k=10,\n",
" filter={\n",
" \"$and\": [\n",
" {\"id\": {\"$in\": [\"1\", \"5\", \"2\", \"9\"]}},\n",
" {\"location\": {\"$in\": [\"pond\", \"market\"]}},\n",
" ]\n",
" },\n",
")"
]
},
{
"cell_type": "markdown",
"id": "2e65adc1",
"metadata": {},
"source": [
"If you want to execute a similarity search and receive the corresponding scores you can run:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d92e7b3",
"metadata": {},
"outputs": [],
"source": [
"results = vector_store.similarity_search_with_score(query=\"cats\", k=1)\n",
"for doc, score in results:\n",
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
]
},
{
"cell_type": "markdown",
"id": "8d40db8c",
"metadata": {},
"source": [
"### Query by turning into retriever\n",
"\n",
"You can also transform the vector store into a retriever for easier usage in your chains. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7cd1fb75",
"metadata": {},
"outputs": [],
"source": [
"retriever = vector_store.as_retriever(search_kwargs={\"k\": 1})\n",
"retriever.invoke(\"kitty\")"
]
},
{
"cell_type": "markdown",
"id": "7ecd77a0",
"metadata": {},
"source": [
"## Usage for retrieval-augmented generation\n",
"\n",
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
"\n",
"- [Tutorials](/docs/tutorials/)\n",
"- [How-to: Question and answer with RAG](https://python.langchain.com/docs/how_to/#qa-with-rag)\n",
"- [Retrieval conceptual docs](https://python.langchain.com/docs/concepts/retrieval)"
]
},
{
"cell_type": "markdown",
"id": "33a5f0e6",
"metadata": {},
"source": [
"## API reference\n",
"\n",
"For detailed documentation of all GelVectorStore features and configurations head to the API reference: https://python.langchain.com/api_reference/"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -553,7 +553,10 @@
"cell_type": "markdown",
"id": "8edb47106e1a46a883d545849b8ab81b",
"metadata": {
"collapsed": false
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"\n",
@@ -576,6 +579,9 @@
"id": "10185d26023b46108eb7d9f57d49d2b3",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
@@ -603,7 +609,10 @@
"cell_type": "markdown",
"id": "8763a12b2bbd4a93a75aff182afb95dc",
"metadata": {
"collapsed": false
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"> - When you use `BM25BuiltInFunction`, please note that the full-text search is available in Milvus Standalone and Milvus Distributed, but not in Milvus Lite, although it is on the roadmap for future inclusion. It will also be available in Zilliz Cloud (fully-managed Milvus) soon. Please reach out to support@zilliz.com for more information.\n",
@@ -617,7 +626,10 @@
"cell_type": "markdown",
"id": "7623eae2785240b9bd12b16a66d81610",
"metadata": {
"collapsed": false
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"### Rerank the candidates\n",
@@ -632,6 +644,9 @@
"id": "7cdc8c89c7104fffa095e18ddfef8986",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
@@ -645,14 +660,6 @@
")"
]
},
{
"cell_type": "markdown",
"id": "b3965036",
"metadata": {},
"source": [
"For more information about Full-text search and Hybrid search, please refer to the [Using Full-Text Search with LangChain and Milvus](https://milvus.io/docs/full_text_search_with_langchain.md) and [Hybrid Retrieval with LangChain and Milvus](https://milvus.io/docs/milvus_hybrid_search_retriever.md)."
]
},
{
"cell_type": "markdown",
"id": "8ac953f1",
@@ -813,7 +820,7 @@
"provenance": []
},
"kernelspec": {
"display_name": ".venv",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -827,7 +834,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.13.2"
}
},
"nbformat": 4,

View File

@@ -26,7 +26,7 @@
},
"outputs": [],
"source": [
"pip install -qU langchain-pinecone pinecone-notebooks"
"pip install -qU langchain langchain-pinecone langchain-openai"
]
},
{
@@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "eb554814",
"metadata": {},
"outputs": [],
@@ -57,7 +57,7 @@
"import getpass\n",
"import os\n",
"\n",
"from pinecone import Pinecone, ServerlessSpec\n",
"from pinecone import Pinecone\n",
"\n",
"if not os.getenv(\"PINECONE_API_KEY\"):\n",
" os.environ[\"PINECONE_API_KEY\"] = getpass.getpass(\"Enter your Pinecone API key: \")\n",
@@ -98,59 +98,41 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 4,
"id": "276a06dd",
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"from pinecone import ServerlessSpec\n",
"\n",
"index_name = \"langchain-test-index\" # change if desired\n",
"\n",
"existing_indexes = [index_info[\"name\"] for index_info in pc.list_indexes()]\n",
"\n",
"if index_name not in existing_indexes:\n",
"if not pc.has_index(index_name):\n",
" pc.create_index(\n",
" name=index_name,\n",
" dimension=3072,\n",
" dimension=1536,\n",
" metric=\"cosine\",\n",
" spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
" )\n",
" while not pc.describe_index(index_name).status[\"ready\"]:\n",
" time.sleep(1)\n",
"\n",
"index = pc.Index(index_name)"
]
},
{
"cell_type": "markdown",
"id": "3a4d377f",
"metadata": {},
"source": [
"Now that our Pinecone index is setup, we can initialize our vector store. \n",
"\n",
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
"\n",
"<EmbeddingTabs/>\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 5,
"id": "1485db56",
"metadata": {},
"outputs": [],
"source": [
"# | output: false\n",
"# | echo: false\n",
"from langchain_openai import OpenAIEmbeddings\n",
"\n",
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 6,
"id": "6e104aee",
"metadata": {},
"outputs": [],
@@ -176,30 +158,10 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"id": "70e688f4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['167b8681-5974-467f-adcb-6e987a18df01',\n",
" 'd16010fd-41f8-4d49-9c22-c66d5555a3fe',\n",
" 'ffcacfb3-2bc2-44c3-a039-c2256a905c0e',\n",
" 'cf3bfc9f-5dc7-4f5e-bb41-edb957394126',\n",
" 'e99b07eb-fdff-4cb9-baa8-619fd8efeed3',\n",
" '68c93033-a24f-40bd-8492-92fa26b631a4',\n",
" 'b27a4ecb-b505-4c5d-89ff-526e3d103558',\n",
" '4868a9e6-e6fb-4079-b400-4a1dfbf0d4c4',\n",
" '921c0e9c-0550-4eb5-9a6c-ed44410788b2',\n",
" 'c446fc23-64e8-47e7-8c19-ecf985e9411e']"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"from uuid import uuid4\n",
"\n",
@@ -268,7 +230,6 @@
" document_10,\n",
"]\n",
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
"\n",
"vector_store.add_documents(documents=documents, ids=uuids)"
]
},
@@ -282,7 +243,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 8,
"id": "5b8437cd",
"metadata": {},
"outputs": [],
@@ -306,19 +267,10 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 9,
"id": "ffbcb3fb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
]
}
],
"outputs": [],
"source": [
"results = vector_store.similarity_search(\n",
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
@@ -341,18 +293,10 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"id": "5fb24583",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* [SIM=0.553187] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
]
}
],
"outputs": [],
"source": [
"results = vector_store.similarity_search_with_score(\n",
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
@@ -377,25 +321,14 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"id": "78140e87",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"retriever = vector_store.as_retriever(\n",
" search_type=\"similarity_score_threshold\",\n",
" search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n",
" search_kwargs={\"k\": 1, \"score_threshold\": 0.4},\n",
")\n",
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
]
@@ -421,13 +354,13 @@
"source": [
"## API reference\n",
"\n",
"For detailed documentation of all __ModuleName__VectorStore features and configurations head to the API reference: https://python.langchain.com/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html"
"For detailed documentation of all features and configurations head to the API reference: https://python.langchain.com/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
@@ -441,7 +374,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
"version": "3.10.15"
}
},
"nbformat": 4,

View File

@@ -234,6 +234,8 @@
"_ = vector_store.add_documents(documents=all_splits)\n",
"\n",
"# Define prompt for question-answering\n",
"# N.B. for non-US LangSmith endpoints, you may need to specify\n",
"# api_url=\"https://api.smith.langchain.com\" in hub.pull.\n",
"prompt = hub.pull(\"rlm/rag-prompt\")\n",
"\n",
"\n",
@@ -535,7 +537,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "46f378c5-858c-488f-8aef-8b59a6280791",
"metadata": {},
"outputs": [
@@ -553,6 +555,8 @@
"source": [
"from langchain import hub\n",
"\n",
"# N.B. for non-US LangSmith endpoints, you may need to specify\n",
"# api_url=\"https://api.smith.langchain.com\" in hub.pull.\n",
"prompt = hub.pull(\"rlm/rag-prompt\")\n",
"\n",
"example_messages = prompt.invoke(\n",

View File

@@ -139,7 +139,7 @@ const config = {
},
announcementBar: {
content:
'<strong>Join us at <a href="https://interrupt.langchain.com/" target="_blank" rel="noopener noreferrer"> Interrupt: The Agent AI Conference by LangChain</a> on May 13 & 14 in San Francisco!</strong>',
'<strong>We are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith. <a href="https://www.langchain.com/careers" target="_blank" rel="noopener noreferrer"> Join our team!</a></strong>',
backgroundColor: '#d0c9fe'
},
prism: {

View File

@@ -37,6 +37,7 @@ def _reorder_keys(p):
"downloads",
"downloads_updated_at",
"disabled",
"include_in_api_ref",
]
if set(keys) - set(key_order):
raise ValueError(f"Unexpected keys: {set(keys) - set(key_order)}")

View File

@@ -9,8 +9,10 @@ export default function EmbeddingTabs(props) {
hideOpenai,
azureOpenaiParams,
hideAzureOpenai,
googleParams,
hideGoogle,
googleGenAIParams,
hideGoogleGenAI,
googleVertexAIParams,
hideGoogleVertexAI,
awsParams,
hideAws,
huggingFaceParams,
@@ -38,7 +40,8 @@ export default function EmbeddingTabs(props) {
const azureParamsOrDefault =
azureOpenaiParams ??
`\n azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],\n azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],\n openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],\n`;
const googleParamsOrDefault = googleParams ?? `model="text-embedding-004"`;
const googleGenAIParamsOrDefault = googleGenAIParams ?? `model="models/embedding-001"`;
const googleVertexAIParamsOrDefault = googleVertexAIParams ?? `model="text-embedding-004"`;
const awsParamsOrDefault = awsParams ?? `model_id="amazon.titan-embed-text-v2:0"`;
const huggingFaceParamsOrDefault = huggingFaceParams ?? `model_name="sentence-transformers/all-mpnet-base-v2"`;
const ollamaParamsOrDefault = ollamaParams ?? `model="llama3"`;
@@ -73,13 +76,22 @@ export default function EmbeddingTabs(props) {
shouldHide: hideAzureOpenai,
},
{
value: "Google",
label: "Google",
text: `from langchain_google_vertexai import VertexAIEmbeddings\n\n${embeddingVarName} = VertexAIEmbeddings(${googleParamsOrDefault})`,
value: "GoogleGenAI",
label: "Google Gemini",
text: `from langchain_google_genai import GoogleGenerativeAIEmbeddings\n\n${embeddingVarName} = GoogleGenerativeAIEmbeddings(${googleGenAIParamsOrDefault})`,
apiKeyName: "GOOGLE_API_KEY",
packageName: "langchain-google-genai",
default: false,
shouldHide: hideGoogleGenAI,
},
{
value: "GoogleVertexAI",
label: "Google Vertex",
text: `from langchain_google_vertexai import VertexAIEmbeddings\n\n${embeddingVarName} = VertexAIEmbeddings(${googleVertexAIParamsOrDefault})`,
apiKeyName: undefined,
packageName: "langchain-google-vertexai",
default: false,
shouldHide: hideGoogle,
shouldHide: hideGoogleVertexAI,
},
{
value: "AWS",

View File

@@ -461,14 +461,6 @@ const FEATURE_TABLES = {
apiLink: "https://python.langchain.com/api_reference/elasticsearch/retrievers/langchain_elasticsearch.retrievers.ElasticsearchRetriever.html",
package: "langchain_elasticsearch"
},
{
name: "MilvusCollectionHybridSearchRetriever",
link: "milvus_hybrid_search",
selfHost: true,
cloudOffering: false,
apiLink: "https://python.langchain.com/api_reference/milvus/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html",
package: "langchain_milvus"
},
{
name: "VertexAISearchRetriever",
link: "google_vertex_ai_search",

View File

@@ -146,6 +146,10 @@
"source": "/docs/integrations/retrievers/singlestoredb(/?)",
"destination": "https://python.langchain.com/v0.2/docs/integrations/retrievers/singlestoredb/"
},
{
"source": "/docs/integrations/providers/dspy(/?)",
"destination": "https://python.langchain.com/v0.2/docs/integrations/providers/dspy/"
},
{
"source": "/api_reference/mongodb/:path(.*/?)*",
"destination": "https://langchain-mongodb.readthedocs.io/en/latest/langchain_mongodb/api_docs.html"
@@ -153,6 +157,10 @@
{
"source": "/api_reference/tests/:path(.*/?)*",
"destination": "/api_reference/standard_tests/:path"
},
{
"source": "/docs/integrations/retrievers/milvus_hybrid_search(/?)",
"destination": "https://python.langchain.com/v0.2/docs/integrations/retrievers/milvus_hybrid_search/"
}
]
}

View File

@@ -2,3 +2,5 @@ httpx
grpcio
aiohttp<3.11
protobuf<3.21
tenacity
urllib3

View File

@@ -50,7 +50,7 @@ def migrate(
"reflect any imports from new packages. For example, if you see new "
"imports from langchain_openai, langchain_anthropic or "
"langchain_text_splitters you "
"should them to your dependencies! \n\n"
"should add them to your dependencies! \n\n"
'⚠️ This script is a "best-effort", and is likely to make some '
"mistakes.\n\n"
"🛡️ Backup your code prior to running the migration script -- it will "

View File

@@ -30,15 +30,15 @@ if TYPE_CHECKING:
from .path import as_import_path, get_relative_path
__all__ = (
"LangChainBetaWarning",
"LangChainDeprecationWarning",
"as_import_path",
"beta",
"deprecated",
"get_relative_path",
"LangChainBetaWarning",
"LangChainDeprecationWarning",
"suppress_langchain_beta_warning",
"surface_langchain_beta_warnings",
"suppress_langchain_deprecation_warning",
"surface_langchain_beta_warnings",
"surface_langchain_deprecation_warnings",
"warn_deprecated",
)

View File

@@ -54,39 +54,39 @@ if TYPE_CHECKING:
)
__all__ = (
"dispatch_custom_event",
"adispatch_custom_event",
"RetrieverManagerMixin",
"LLMManagerMixin",
"ChainManagerMixin",
"ToolManagerMixin",
"Callbacks",
"CallbackManagerMixin",
"RunManagerMixin",
"BaseCallbackHandler",
"AsyncCallbackHandler",
"BaseCallbackManager",
"BaseRunManager",
"RunManager",
"ParentRunManager",
"AsyncRunManager",
"AsyncParentRunManager",
"CallbackManagerForLLMRun",
"AsyncCallbackManagerForLLMRun",
"CallbackManagerForChainRun",
"AsyncCallbackManagerForChainRun",
"CallbackManagerForToolRun",
"AsyncCallbackManagerForToolRun",
"CallbackManagerForRetrieverRun",
"AsyncCallbackManagerForRetrieverRun",
"CallbackManager",
"CallbackManagerForChainGroup",
"AsyncCallbackManager",
"AsyncCallbackManagerForChainGroup",
"AsyncCallbackManagerForChainRun",
"AsyncCallbackManagerForLLMRun",
"AsyncCallbackManagerForRetrieverRun",
"AsyncCallbackManagerForToolRun",
"AsyncParentRunManager",
"AsyncRunManager",
"BaseCallbackHandler",
"BaseCallbackManager",
"BaseRunManager",
"CallbackManager",
"CallbackManagerForChainGroup",
"CallbackManagerForChainRun",
"CallbackManagerForLLMRun",
"CallbackManagerForRetrieverRun",
"CallbackManagerForToolRun",
"CallbackManagerMixin",
"Callbacks",
"ChainManagerMixin",
"FileCallbackHandler",
"LLMManagerMixin",
"ParentRunManager",
"RetrieverManagerMixin",
"RunManager",
"RunManagerMixin",
"StdOutCallbackHandler",
"StreamingStdOutCallbackHandler",
"FileCallbackHandler",
"ToolManagerMixin",
"UsageMetadataCallbackHandler",
"adispatch_custom_event",
"dispatch_custom_event",
"get_usage_metadata_callback",
)

View File

@@ -520,6 +520,8 @@ class RunManager(BaseRunManager):
Returns:
Any: The result of the callback.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_text",
@@ -542,6 +544,8 @@ class RunManager(BaseRunManager):
retry_state (RetryCallState): The retry state.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_retry",
@@ -601,6 +605,8 @@ class AsyncRunManager(BaseRunManager, ABC):
Returns:
Any: The result of the callback.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_text",
@@ -623,6 +629,8 @@ class AsyncRunManager(BaseRunManager, ABC):
retry_state (RetryCallState): The retry state.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_retry",
@@ -675,6 +683,8 @@ class CallbackManagerForLLMRun(RunManager, LLMManagerMixin):
The chunk. Defaults to None.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_llm_new_token",
@@ -694,6 +704,8 @@ class CallbackManagerForLLMRun(RunManager, LLMManagerMixin):
response (LLMResult): The LLM result.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_llm_end",
@@ -718,6 +730,8 @@ class CallbackManagerForLLMRun(RunManager, LLMManagerMixin):
- response (LLMResult): The response which was generated before
the error occurred.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_llm_error",
@@ -750,7 +764,6 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
inheritable_metadata=self.inheritable_metadata,
)
@shielded
async def on_llm_new_token(
self,
token: str,
@@ -766,6 +779,8 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
The chunk. Defaults to None.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_llm_new_token",
@@ -786,6 +801,8 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
response (LLMResult): The LLM result.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_llm_end",
@@ -814,6 +831,8 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_llm_error",
@@ -836,6 +855,8 @@ class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
outputs (Union[dict[str, Any], Any]): The outputs of the chain.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_chain_end",
@@ -858,6 +879,8 @@ class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
error (Exception or KeyboardInterrupt): The error.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_chain_error",
@@ -879,6 +902,8 @@ class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
Returns:
Any: The result of the callback.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_agent_action",
@@ -900,6 +925,8 @@ class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
Returns:
Any: The result of the callback.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_agent_finish",
@@ -942,6 +969,8 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
outputs (Union[dict[str, Any], Any]): The outputs of the chain.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_chain_end",
@@ -965,6 +994,8 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
error (Exception or KeyboardInterrupt): The error.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_chain_error",
@@ -976,7 +1007,6 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
**kwargs,
)
@shielded
async def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
"""Run when agent action is received.
@@ -987,6 +1017,8 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
Returns:
Any: The result of the callback.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_agent_action",
@@ -998,7 +1030,6 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
**kwargs,
)
@shielded
async def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
"""Run when agent finish is received.
@@ -1009,6 +1040,8 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
Returns:
Any: The result of the callback.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_agent_finish",
@@ -1035,6 +1068,8 @@ class CallbackManagerForToolRun(ParentRunManager, ToolManagerMixin):
output (Any): The output of the tool.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_tool_end",
@@ -1057,6 +1092,8 @@ class CallbackManagerForToolRun(ParentRunManager, ToolManagerMixin):
error (Exception or KeyboardInterrupt): The error.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_tool_error",
@@ -1089,7 +1126,6 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
inheritable_metadata=self.inheritable_metadata,
)
@shielded
async def on_tool_end(self, output: Any, **kwargs: Any) -> None:
"""Async run when the tool ends running.
@@ -1097,6 +1133,8 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
output (Any): The output of the tool.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_tool_end",
@@ -1108,7 +1146,6 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
**kwargs,
)
@shielded
async def on_tool_error(
self,
error: BaseException,
@@ -1120,6 +1157,8 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
error (Exception or KeyboardInterrupt): The error.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_tool_error",
@@ -1146,6 +1185,8 @@ class CallbackManagerForRetrieverRun(ParentRunManager, RetrieverManagerMixin):
documents (Sequence[Document]): The retrieved documents.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_retriever_end",
@@ -1168,6 +1209,8 @@ class CallbackManagerForRetrieverRun(ParentRunManager, RetrieverManagerMixin):
error (BaseException): The error.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
handle_event(
self.handlers,
"on_retriever_error",
@@ -1213,6 +1256,8 @@ class AsyncCallbackManagerForRetrieverRun(
documents (Sequence[Document]): The retrieved documents.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_retriever_end",
@@ -1236,6 +1281,8 @@ class AsyncCallbackManagerForRetrieverRun(
error (BaseException): The error.
**kwargs (Any): Additional keyword arguments.
"""
if not self.handlers:
return
await ahandle_event(
self.handlers,
"on_retriever_error",
@@ -1521,6 +1568,8 @@ class CallbackManager(BaseCallbackManager):
.. versionadded:: 0.2.14
"""
if not self.handlers:
return
if kwargs:
msg = (
"The dispatcher API does not accept additional keyword arguments."
@@ -1998,6 +2047,8 @@ class AsyncCallbackManager(BaseCallbackManager):
.. versionadded:: 0.2.14
"""
if not self.handlers:
return
if run_id is None:
run_id = uuid.uuid4()

View File

@@ -14,8 +14,8 @@ __all__ = (
"BaseLoader",
"Blob",
"BlobLoader",
"PathLike",
"LangSmithLoader",
"PathLike",
)
_dynamic_imports = {

View File

@@ -50,7 +50,7 @@ class LangSmithLoader(BaseLoader):
offset: int = 0,
limit: Optional[int] = None,
metadata: Optional[dict] = None,
filter: Optional[str] = None,
filter: Optional[str] = None, # noqa: A002
content_key: str = "",
format_content: Optional[Callable[..., str]] = None,
client: Optional[LangSmithClient] = None,

View File

@@ -14,7 +14,7 @@ if TYPE_CHECKING:
from .compressor import BaseDocumentCompressor
from .transformers import BaseDocumentTransformer
__all__ = ("Document", "BaseDocumentTransformer", "BaseDocumentCompressor")
__all__ = ("BaseDocumentCompressor", "BaseDocumentTransformer", "Document")
_dynamic_imports = {
"Document": "base",

Some files were not shown because too many files have changed in this diff Show More