mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-12 20:20:08 +00:00
Compare commits
120 Commits
langchain-
...
langchain-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
092697de60 | ||
|
|
71c074d28f | ||
|
|
053a1246da | ||
|
|
1b5ffe4107 | ||
|
|
f16456139b | ||
|
|
cf1fa27e27 | ||
|
|
beacedd6b3 | ||
|
|
53d6286539 | ||
|
|
7b45d46210 | ||
|
|
580fc7d464 | ||
|
|
6993bc9ad1 | ||
|
|
dcb5aba999 | ||
|
|
f29659728c | ||
|
|
916768e3c1 | ||
|
|
ff12555bdc | ||
|
|
0c6137ec2b | ||
|
|
bf645c83f4 | ||
|
|
49fbcec34f | ||
|
|
32fcc97a90 | ||
|
|
8b6fec89bc | ||
|
|
ea1f9e2d5b | ||
|
|
17c5a1621f | ||
|
|
e1af509966 | ||
|
|
eb25d7472d | ||
|
|
c982573f1e | ||
|
|
671e4fd114 | ||
|
|
bd367ba10c | ||
|
|
1f43b6062e | ||
|
|
c178ad87b6 | ||
|
|
12b063eb67 | ||
|
|
a401d7e52a | ||
|
|
9efafe3337 | ||
|
|
03adca6c44 | ||
|
|
6bbc12b7f7 | ||
|
|
aa4890c136 | ||
|
|
a8f2ddee31 | ||
|
|
6cd1aadf60 | ||
|
|
eab8484a80 | ||
|
|
672339f3c6 | ||
|
|
6f2acbcf2e | ||
|
|
8b145d5dc3 | ||
|
|
d4f77a8c8f | ||
|
|
71b71768bf | ||
|
|
921573e2b7 | ||
|
|
d8a7eda12e | ||
|
|
8af0dc5fd6 | ||
|
|
7263011b24 | ||
|
|
1523602196 | ||
|
|
367566b02f | ||
|
|
29bfbc0ea6 | ||
|
|
b8ae2de169 | ||
|
|
263c215112 | ||
|
|
17b799860f | ||
|
|
0b8837a0cc | ||
|
|
4f41b54bcb | ||
|
|
ce0b1a9428 | ||
|
|
275e3b6710 | ||
|
|
e53c10e546 | ||
|
|
395f057243 | ||
|
|
a9ee625f32 | ||
|
|
544648eb71 | ||
|
|
40be8d1d90 | ||
|
|
f034bd7933 | ||
|
|
17a04dd598 | ||
|
|
a44e707811 | ||
|
|
3520520a48 | ||
|
|
09d74504e3 | ||
|
|
b2f0fbfea5 | ||
|
|
636a35fc2d | ||
|
|
7b9feb60cc | ||
|
|
87add0809f | ||
|
|
868cfc4a8f | ||
|
|
83d006190d | ||
|
|
1e56c66f86 | ||
|
|
92af7b0933 | ||
|
|
e6147ce5d2 | ||
|
|
0d59fe9789 | ||
|
|
ff9183fd3c | ||
|
|
65fbbb0249 | ||
|
|
77d3f04e0a | ||
|
|
0dee089ba7 | ||
|
|
2ec74fea44 | ||
|
|
683da2c9e9 | ||
|
|
0ef4ac75b7 | ||
|
|
23ec06b481 | ||
|
|
e9e597be8e | ||
|
|
0ba8697286 | ||
|
|
9aac8923a3 | ||
|
|
efc52e18e9 | ||
|
|
2d202f9762 | ||
|
|
d4555ac924 | ||
|
|
e34f9fd6f7 | ||
|
|
6c3901f9f9 | ||
|
|
682f338c17 | ||
|
|
d7e016c5fc | ||
|
|
4b11cbeb47 | ||
|
|
b5b90b5929 | ||
|
|
f70b263ff3 | ||
|
|
bb69d4c42e | ||
|
|
1df3ee91e7 | ||
|
|
19041dcc95 | ||
|
|
3cba22d8d7 | ||
|
|
66d1ed6099 | ||
|
|
a15034d8d1 | ||
|
|
57c81dc3e3 | ||
|
|
52732a4d13 | ||
|
|
5dde64583e | ||
|
|
6b6750967a | ||
|
|
703fce7972 | ||
|
|
50fa524a6d | ||
|
|
c0b69808a8 | ||
|
|
fce8caca16 | ||
|
|
b8d0403671 | ||
|
|
1204fb8010 | ||
|
|
1e00116ae7 | ||
|
|
3e25a93136 | ||
|
|
325f729a92 | ||
|
|
00ac49dd3e | ||
|
|
6268ae8db0 | ||
|
|
77ecf47f6d |
11
.github/dependabot.yml
vendored
Normal file
11
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
# and
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
1
.github/scripts/check_diff.py
vendored
1
.github/scripts/check_diff.py
vendored
@@ -37,7 +37,6 @@ IGNORED_PARTNERS = [
|
||||
]
|
||||
|
||||
PY_312_MAX_PACKAGES = [
|
||||
"libs/partners/voyageai",
|
||||
"libs/partners/chroma", # https://github.com/chroma-core/chroma/issues/4382
|
||||
]
|
||||
|
||||
|
||||
1
.github/workflows/_integration_test.yml
vendored
1
.github/workflows/_integration_test.yml
vendored
@@ -67,7 +67,6 @@ jobs:
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
|
||||
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
|
||||
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
|
||||
3
.github/workflows/_release.yml
vendored
3
.github/workflows/_release.yml
vendored
@@ -322,7 +322,6 @@ jobs:
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
|
||||
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
@@ -341,7 +340,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
partner: [openai, anthropic]
|
||||
partner: [anthropic]
|
||||
fail-fast: false # Continue testing other partners if one fails
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
2
.github/workflows/check-broken-links.yml
vendored
2
.github/workflows/check-broken-links.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Use Node.js 18.x
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 18.x
|
||||
cache: "yarn"
|
||||
|
||||
2
.github/workflows/check_diffs.yml
vendored
2
.github/workflows/check_diffs.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- id: files
|
||||
uses: Ana06/get-changed-files@v2.2.0
|
||||
uses: Ana06/get-changed-files@v2.3.0
|
||||
- id: set-matrix
|
||||
run: |
|
||||
python -m pip install packaging requests
|
||||
|
||||
2
.github/workflows/check_new_docs.yml
vendored
2
.github/workflows/check_new_docs.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- id: files
|
||||
uses: Ana06/get-changed-files@v2.2.0
|
||||
uses: Ana06/get-changed-files@v2.3.0
|
||||
with:
|
||||
filter: |
|
||||
*.ipynb
|
||||
|
||||
4
.github/workflows/codspeed.yml
vendored
4
.github/workflows/codspeed.yml
vendored
@@ -21,12 +21,12 @@ jobs:
|
||||
|
||||
# We have to use 3.12, 3.13 is not yet supported
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
# Using this action is still necessary for CodSpeed to work
|
||||
- uses: actions/setup-python@v3
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
|
||||
@@ -103,12 +103,6 @@ repos:
|
||||
entry: make -C libs/partners/qdrant format
|
||||
files: ^libs/partners/qdrant/
|
||||
pass_filenames: false
|
||||
- id: voyageai
|
||||
name: format partners/voyageai
|
||||
language: system
|
||||
entry: make -C libs/partners/voyageai format
|
||||
files: ^libs/partners/voyageai/
|
||||
pass_filenames: false
|
||||
- id: root
|
||||
name: format docs, cookbook
|
||||
language: system
|
||||
|
||||
2
Makefile
2
Makefile
@@ -48,7 +48,7 @@ api_docs_quick_preview:
|
||||
api_docs_clean:
|
||||
find ./docs/api_reference -name '*_api_reference.rst' -delete
|
||||
git clean -fdX ./docs/api_reference
|
||||
rm docs/api_reference/index.md
|
||||
rm -f docs/api_reference/index.md
|
||||
|
||||
|
||||
## api_docs_linkcheck: Run linkchecker on the API Reference documentation.
|
||||
|
||||
@@ -22,7 +22,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 1,
|
||||
"id": "e8d63d14-138d-4aa5-a741-7fd3537d00aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2e87c10a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -37,7 +49,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 3,
|
||||
"id": "0b7b772b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -54,19 +66,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 4,
|
||||
"id": "f2675861",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Running Chroma using direct local API.\n",
|
||||
"Using DuckDB in-memory for database. Data will be transient.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
@@ -81,7 +84,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "bc5403d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -93,17 +96,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "1431cded",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import WebBaseLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "915d3ff3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -113,16 +124,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "96a2edf8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Running Chroma using direct local API.\n",
|
||||
"Using DuckDB in-memory for database. Data will be transient.\n"
|
||||
"Created a chunk of size 2122, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 3187, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1017, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1049, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1256, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2321, which is longer than the specified 1000\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -135,14 +150,6 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "71ecef90",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c0a6c031",
|
||||
@@ -153,31 +160,30 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 9,
|
||||
"id": "eb142786",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import things that are needed generically\n",
|
||||
"from langchain.agents import AgentType, Tool, initialize_agent\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
"from langchain.agents import Tool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 10,
|
||||
"id": "850bc4e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name=\"State of Union QA System\",\n",
|
||||
" name=\"state_of_union_qa_system\",\n",
|
||||
" func=state_of_union.run,\n",
|
||||
" description=\"useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question.\",\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Ruff QA System\",\n",
|
||||
" name=\"ruff_qa_system\",\n",
|
||||
" func=ruff.run,\n",
|
||||
" description=\"useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question.\",\n",
|
||||
" ),\n",
|
||||
@@ -186,94 +192,116 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "fc47f230",
|
||||
"execution_count": 11,
|
||||
"id": "70c461d8-aaca-4f2a-9a93-bf35841cc615",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Construct the agent. We will use the default agent type here.\n",
|
||||
"# See documentation for a full list of options.\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "10ca2db8",
|
||||
"execution_count": 12,
|
||||
"id": "a6d2b911-3044-4430-a35b-75832bb45334",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"What did biden say about ketanji brown jackson in the state of the union address?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" state_of_union_qa_system (call_26QlRdsptjEJJZjFsAUjEbaH)\n",
|
||||
" Call ID: call_26QlRdsptjEJJZjFsAUjEbaH\n",
|
||||
" Args:\n",
|
||||
" __arg1: What did Biden say about Ketanji Brown Jackson in the state of the union address?\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: state_of_union_qa_system\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out what Biden said about Ketanji Brown Jackson in the State of the Union address.\n",
|
||||
"Action: State of Union QA System\n",
|
||||
"Action Input: What did Biden say about Ketanji Brown Jackson in the State of the Union address?\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n",
|
||||
" Biden said that he nominated Ketanji Brown Jackson for the United States Supreme Court and praised her as one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence.\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"In the State of the Union address, Biden said that he nominated Ketanji Brown Jackson for the United States Supreme Court and praised her as one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"What did biden say about ketanji brown jackson in the state of the union address?\"\n",
|
||||
")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"What did biden say about ketanji brown jackson in the state of the union address?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "4e91b811",
|
||||
"execution_count": 13,
|
||||
"id": "e836b4cd-abf7-49eb-be0e-b9ad501213f3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Why use ruff over flake8?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" ruff_qa_system (call_KqDoWeO9bo9OAXdxOsCb6msC)\n",
|
||||
" Call ID: call_KqDoWeO9bo9OAXdxOsCb6msC\n",
|
||||
" Args:\n",
|
||||
" __arg1: Why use ruff over flake8?\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: ruff_qa_system\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out the advantages of using ruff over flake8\n",
|
||||
"Action: Ruff QA System\n",
|
||||
"Action Input: What are the advantages of using ruff over flake8?\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.\u001b[0m\n",
|
||||
"There are a few reasons why someone might choose to use Ruff over Flake8:\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"1. Larger rule set: Ruff implements over 800 rules, while Flake8 only implements around 200. This means that Ruff can catch more potential issues in your code.\n",
|
||||
"\n",
|
||||
"2. Better compatibility with other tools: Ruff is designed to work well with other tools like Black, isort, and type checkers like Mypy. This means that you can use Ruff alongside these tools to get more comprehensive feedback on your code.\n",
|
||||
"\n",
|
||||
"3. Automatic fixing of lint violations: Unlike Flake8, Ruff is capable of automatically fixing its own lint violations. This can save you time and effort when fixing issues in your code.\n",
|
||||
"\n",
|
||||
"4. Native implementation of popular Flake8 plugins: Ruff re-implements some of the most popular Flake8 plugins natively, which means you don't have to install and configure multiple plugins to get the same functionality.\n",
|
||||
"\n",
|
||||
"Overall, Ruff offers a more comprehensive and user-friendly experience compared to Flake8, making it a popular choice for many developers.\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"You might choose to use Ruff over Flake8 for several reasons:\n",
|
||||
"\n",
|
||||
"1. Ruff has a much larger rule set, implementing over 800 rules compared to Flake8's roughly 200, so it can catch more potential issues.\n",
|
||||
"2. Ruff is designed to work better with other tools like Black, isort, and type checkers like Mypy, providing more comprehensive code feedback.\n",
|
||||
"3. Ruff can automatically fix its own lint violations, which Flake8 cannot, saving time and effort.\n",
|
||||
"4. Ruff natively implements some popular Flake8 plugins, so you don't need to install and configure multiple plugins separately.\n",
|
||||
"\n",
|
||||
"Overall, Ruff offers a more comprehensive and user-friendly experience compared to Flake8.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Why use ruff over flake8?\")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Why use ruff over flake8?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -296,20 +324,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 14,
|
||||
"id": "f59b377e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name=\"State of Union QA System\",\n",
|
||||
" name=\"state_of_union_qa_system\",\n",
|
||||
" func=state_of_union.run,\n",
|
||||
" description=\"useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question.\",\n",
|
||||
" return_direct=True,\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Ruff QA System\",\n",
|
||||
" name=\"ruff_qa_system\",\n",
|
||||
" func=ruff.run,\n",
|
||||
" description=\"useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question.\",\n",
|
||||
" return_direct=True,\n",
|
||||
@@ -319,90 +347,92 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "8615707a",
|
||||
"execution_count": 15,
|
||||
"id": "06f69c0f-c83d-4b7f-a1c8-7614aced3bae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"id": "36e718a9",
|
||||
"execution_count": 16,
|
||||
"id": "a6b38c12-ac25-43c0-b9c2-2b1985ab4825",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"What did biden say about ketanji brown jackson in the state of the union address?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" state_of_union_qa_system (call_yjxh11OnZiauoyTAn9npWdxj)\n",
|
||||
" Call ID: call_yjxh11OnZiauoyTAn9npWdxj\n",
|
||||
" Args:\n",
|
||||
" __arg1: What did Biden say about Ketanji Brown Jackson in the state of the union address?\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: state_of_union_qa_system\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out what Biden said about Ketanji Brown Jackson in the State of the Union address.\n",
|
||||
"Action: State of Union QA System\n",
|
||||
"Action Input: What did Biden say about Ketanji Brown Jackson in the State of the Union address?\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
" Biden said that he nominated Ketanji Brown Jackson for the United States Supreme Court and praised her as one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" Biden said that Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"What did biden say about ketanji brown jackson in the state of the union address?\"\n",
|
||||
")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"What did biden say about ketanji brown jackson in the state of the union address?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"id": "edfd0a1a",
|
||||
"execution_count": 17,
|
||||
"id": "88f08d86-7972-4148-8128-3ac8898ad68a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Why use ruff over flake8?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" ruff_qa_system (call_GiWWfwF6wbbRFQrHlHbhRtGW)\n",
|
||||
" Call ID: call_GiWWfwF6wbbRFQrHlHbhRtGW\n",
|
||||
" Args:\n",
|
||||
" __arg1: What are the advantages of using ruff over flake8 for Python linting?\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: ruff_qa_system\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out the advantages of using ruff over flake8\n",
|
||||
"Action: Ruff QA System\n",
|
||||
"Action Input: What are the advantages of using ruff over flake8?\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
" Ruff has a larger rule set, supports automatic fixing of lint violations, and does not require the installation of additional plugins. It also has better compatibility with Black and can be used alongside a type checker for more comprehensive code analysis.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Ruff can be used as a drop-in replacement for Flake8 when used (1) without or with a small number of plugins, (2) alongside Black, and (3) on Python 3 code. It also re-implements some of the most popular Flake8 plugins and related code quality tools natively, including isort, yesqa, eradicate, and most of the rules implemented in pyupgrade. Ruff also supports automatically fixing its own lint violations, which Flake8 does not.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Why use ruff over flake8?\")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Why use ruff over flake8?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -417,19 +447,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 18,
|
||||
"id": "d397a233",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name=\"State of Union QA System\",\n",
|
||||
" name=\"state_of_union_qa_system\",\n",
|
||||
" func=state_of_union.run,\n",
|
||||
" description=\"useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question, not referencing any obscure pronouns from the conversation before.\",\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Ruff QA System\",\n",
|
||||
" name=\"ruff_qa_system\",\n",
|
||||
" func=ruff.run,\n",
|
||||
" description=\"useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question, not referencing any obscure pronouns from the conversation before.\",\n",
|
||||
" ),\n",
|
||||
@@ -438,60 +468,60 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"id": "06157240",
|
||||
"execution_count": 19,
|
||||
"id": "41743f29-150d-40ba-aa8e-3a63c32216aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Construct the agent. We will use the default agent type here.\n",
|
||||
"# See documentation for a full list of options.\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"id": "b492b520",
|
||||
"execution_count": 20,
|
||||
"id": "e20e81dd-284a-4d07-9160-63a84b65cba8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"What tool does ruff use to run over Jupyter Notebooks? Did the president mention that tool in the state of the union?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" ruff_qa_system (call_VOnxiOEehauQyVOTjDJkR5L2)\n",
|
||||
" Call ID: call_VOnxiOEehauQyVOTjDJkR5L2\n",
|
||||
" Args:\n",
|
||||
" __arg1: What tool does ruff use to run over Jupyter Notebooks?\n",
|
||||
" state_of_union_qa_system (call_AbSsXAxwe4JtCRhga926SxOZ)\n",
|
||||
" Call ID: call_AbSsXAxwe4JtCRhga926SxOZ\n",
|
||||
" Args:\n",
|
||||
" __arg1: Did the president mention the tool that ruff uses to run over Jupyter Notebooks in the state of the union?\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: state_of_union_qa_system\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out what tool ruff uses to run over Jupyter Notebooks, and if the president mentioned it in the state of the union.\n",
|
||||
"Action: Ruff QA System\n",
|
||||
"Action Input: What tool does ruff use to run over Jupyter Notebooks?\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m Ruff is integrated into nbQA, a tool for running linters and code formatters over Jupyter Notebooks. After installing ruff and nbqa, you can run Ruff over a notebook like so: > nbqa ruff Untitled.html\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to find out if the president mentioned this tool in the state of the union.\n",
|
||||
"Action: State of Union QA System\n",
|
||||
"Action Input: Did the president mention nbQA in the state of the union?\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m No, the president did not mention nbQA in the state of the union.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: No, the president did not mention nbQA in the state of the union.\u001b[0m\n",
|
||||
" No, the president did not mention the tool that ruff uses to run over Jupyter Notebooks in the state of the union.\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"Ruff does not support source.organizeImports and source.fixAll code actions in Jupyter Notebooks. Additionally, the president did not mention the tool that ruff uses to run over Jupyter Notebooks in the state of the union.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'No, the president did not mention nbQA in the state of the union.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"What tool does ruff use to run over Jupyter Notebooks? Did the president mention that tool in the state of the union?\"\n",
|
||||
")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"What tool does ruff use to run over Jupyter Notebooks? Did the president mention that tool in the state of the union?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -519,7 +549,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -663,6 +663,7 @@ def main(dirs: Optional[list] = None) -> None:
|
||||
dir_
|
||||
for dir_ in os.listdir(ROOT_DIR / "libs")
|
||||
if dir_ not in ("cli", "partners", "packages.yml")
|
||||
and "pyproject.toml" in os.listdir(ROOT_DIR / "libs" / dir_)
|
||||
]
|
||||
dirs += [
|
||||
dir_
|
||||
|
||||
@@ -48,7 +48,7 @@ From the opposite direction, scientists use `LangChain` in research and referenc
|
||||
| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022‑05‑25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
|
||||
| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022‑03‑15 | `Docs:` [docs/tutorials/sql_qa](https://python.langchain.com/docs/tutorials/sql_qa), `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
|
||||
| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher, et al. | 2022‑02‑01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
|
||||
| `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon, et al. | 2021‑12‑02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/docs/integrations/providers/dspy)
|
||||
| `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon, et al. | 2021‑12‑02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts)
|
||||
| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy, et al. | 2021‑02‑26 | `API:` [langchain_experimental.open_clip](https://python.langchain.com/api_reference/experimental/open_clip.html)
|
||||
| `2005.14165v4` [Language Models are Few-Shot Learners](http://arxiv.org/abs/2005.14165v4) | Tom B. Brown, Benjamin Mann, Nick Ryder, et al. | 2020‑05‑28 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
|
||||
| `2005.11401v4` [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](http://arxiv.org/abs/2005.11401v4) | Patrick Lewis, Ethan Perez, Aleksandra Piktus, et al. | 2020‑05‑22 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
|
||||
@@ -970,7 +970,7 @@ reducing degenerate repetitions.
|
||||
- **arXiv id:** [2112.01488v3](http://arxiv.org/abs/2112.01488v3) **Published Date:** 2021-12-02
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/docs/integrations/providers/dspy)
|
||||
- **Documentation:** [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts)
|
||||
|
||||
**Abstract:** Neural information retrieval (IR) has greatly advanced search and other
|
||||
knowledge-intensive language tasks. While many neural IR methods encode queries
|
||||
|
||||
@@ -32,7 +32,7 @@ The only requirement for a retriever is the ability to accepts a query and retur
|
||||
In particular, [LangChain's retriever class](https://python.langchain.com/api_reference/core/retrievers/langchain_core.retrievers.BaseRetriever.html#) only requires that the `_get_relevant_documents` method is implemented, which takes a `query: str` and returns a list of [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects that are most relevant to the query.
|
||||
The underlying logic used to get relevant documents is specified by the retriever and can be whatever is most useful for the application.
|
||||
|
||||
A LangChain retriever is a [runnable](/docs/how_to/lcel_cheatsheet/), which is a standard interface is for LangChain components.
|
||||
A LangChain retriever is a [runnable](/docs/how_to/lcel_cheatsheet/), which is a standard interface for LangChain components.
|
||||
This means that it has a few common methods, including `invoke`, that are used to interact with it. A retriever can be invoked with a query:
|
||||
|
||||
```python
|
||||
|
||||
@@ -192,7 +192,7 @@ All Toolkits expose a `get_tools` method which returns a list of tools. You can
|
||||
|
||||
```python
|
||||
# Initialize a toolkit
|
||||
toolkit = ExampleTookit(...)
|
||||
toolkit = ExampleToolkit(...)
|
||||
|
||||
# Get list of tools
|
||||
tools = toolkit.get_tools()
|
||||
|
||||
@@ -530,7 +530,7 @@
|
||||
"\n",
|
||||
" def _run(\n",
|
||||
" self, a: int, b: int, run_manager: Optional[CallbackManagerForToolRun] = None\n",
|
||||
" ) -> str:\n",
|
||||
" ) -> int:\n",
|
||||
" \"\"\"Use the tool.\"\"\"\n",
|
||||
" return a * b\n",
|
||||
"\n",
|
||||
@@ -539,7 +539,7 @@
|
||||
" a: int,\n",
|
||||
" b: int,\n",
|
||||
" run_manager: Optional[AsyncCallbackManagerForToolRun] = None,\n",
|
||||
" ) -> str:\n",
|
||||
" ) -> int:\n",
|
||||
" \"\"\"Use the tool asynchronously.\"\"\"\n",
|
||||
" # If the calculation is cheap, you can just delegate to the sync implementation\n",
|
||||
" # as shown below.\n",
|
||||
|
||||
@@ -67,9 +67,34 @@
|
||||
"When implementing a document loader do **NOT** provide parameters via the `lazy_load` or `alazy_load` methods.\n",
|
||||
"\n",
|
||||
"All configuration is expected to be passed through the initializer (__init__). This was a design choice made by LangChain to make sure that once a document loader has been instantiated it has all the information needed to load documents.\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "520edbbabde7df6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain-core** and **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "936bd5fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_core langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a93f17a87d323bdd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Implementation\n",
|
||||
"\n",
|
||||
"Let's create an example of a standard document loader that loads a file and creates a document from each line in the file."
|
||||
@@ -77,9 +102,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "20f128c1-1a2c-43b9-9e7b-cf9b3a86d1db",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:56.764714Z",
|
||||
"start_time": "2025-04-21T08:49:56.623508Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -122,7 +151,7 @@
|
||||
" self,\n",
|
||||
" ) -> AsyncIterator[Document]: # <-- Does not take any arguments\n",
|
||||
" \"\"\"An async lazy loader that reads a file line by line.\"\"\"\n",
|
||||
" # Requires aiofiles (install with pip)\n",
|
||||
" # Requires aiofiles\n",
|
||||
" # https://github.com/Tinche/aiofiles\n",
|
||||
" import aiofiles\n",
|
||||
"\n",
|
||||
@@ -151,9 +180,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "b1751198-c6dd-4149-95bd-6370ce8fa06f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:56.776521Z",
|
||||
"start_time": "2025-04-21T08:49:56.773511Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -167,9 +200,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "c5210428",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -q aiofiles"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "71ef1482-f9de-4852-b5a4-0938f350612e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:57.972675Z",
|
||||
"start_time": "2025-04-21T08:49:57.969411Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
@@ -179,10 +226,12 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"<class 'langchain_core.documents.base.Document'>\n",
|
||||
"page_content='meow meow🐱 \\n' metadata={'line_number': 0, 'source': './meow.txt'}\n",
|
||||
"page_content='meow meow🐱 \n",
|
||||
"' metadata={'line_number': 0, 'source': './meow.txt'}\n",
|
||||
"\n",
|
||||
"<class 'langchain_core.documents.base.Document'>\n",
|
||||
"page_content=' meow meow🐱 \\n' metadata={'line_number': 1, 'source': './meow.txt'}\n",
|
||||
"page_content=' meow meow🐱 \n",
|
||||
"' metadata={'line_number': 1, 'source': './meow.txt'}\n",
|
||||
"\n",
|
||||
"<class 'langchain_core.documents.base.Document'>\n",
|
||||
"page_content=' meow😻😻' metadata={'line_number': 2, 'source': './meow.txt'}\n"
|
||||
@@ -199,9 +248,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"id": "1588e78c-e81a-4d40-b36c-634242c84a6a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.028989Z",
|
||||
"start_time": "2025-04-21T08:49:58.021972Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
@@ -211,10 +264,12 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"<class 'langchain_core.documents.base.Document'>\n",
|
||||
"page_content='meow meow🐱 \\n' metadata={'line_number': 0, 'source': './meow.txt'}\n",
|
||||
"page_content='meow meow🐱 \n",
|
||||
"' metadata={'line_number': 0, 'source': './meow.txt'}\n",
|
||||
"\n",
|
||||
"<class 'langchain_core.documents.base.Document'>\n",
|
||||
"page_content=' meow meow🐱 \\n' metadata={'line_number': 1, 'source': './meow.txt'}\n",
|
||||
"page_content=' meow meow🐱 \n",
|
||||
"' metadata={'line_number': 1, 'source': './meow.txt'}\n",
|
||||
"\n",
|
||||
"<class 'langchain_core.documents.base.Document'>\n",
|
||||
"page_content=' meow😻😻' metadata={'line_number': 2, 'source': './meow.txt'}\n"
|
||||
@@ -245,21 +300,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "df5ad46a-9e00-4073-8505-489fc4f3799e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.078111Z",
|
||||
"start_time": "2025-04-21T08:49:58.071421Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='meow meow🐱 \\n', metadata={'line_number': 0, 'source': './meow.txt'}),\n",
|
||||
" Document(page_content=' meow meow🐱 \\n', metadata={'line_number': 1, 'source': './meow.txt'}),\n",
|
||||
" Document(page_content=' meow😻😻', metadata={'line_number': 2, 'source': './meow.txt'})]"
|
||||
"[Document(metadata={'line_number': 0, 'source': './meow.txt'}, page_content='meow meow🐱 \\n'),\n",
|
||||
" Document(metadata={'line_number': 1, 'source': './meow.txt'}, page_content=' meow meow🐱 \\n'),\n",
|
||||
" Document(metadata={'line_number': 2, 'source': './meow.txt'}, page_content=' meow😻😻')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -286,9 +345,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "209f6a91-2f15-4cb2-9237-f79fc9493b82",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.124363Z",
|
||||
"start_time": "2025-04-21T08:49:58.120782Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -313,9 +376,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "b1275c59-06d4-458f-abd2-fcbad0bde442",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.172506Z",
|
||||
"start_time": "2025-04-21T08:49:58.167416Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -326,21 +393,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 10,
|
||||
"id": "56a3d707-2086-413b-ae82-50e92ddb27f6",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.218426Z",
|
||||
"start_time": "2025-04-21T08:49:58.214684Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='meow meow🐱 \\n', metadata={'line_number': 1, 'source': './meow.txt'}),\n",
|
||||
" Document(page_content=' meow meow🐱 \\n', metadata={'line_number': 2, 'source': './meow.txt'}),\n",
|
||||
" Document(page_content=' meow😻😻', metadata={'line_number': 3, 'source': './meow.txt'})]"
|
||||
"[Document(metadata={'line_number': 1, 'source': './meow.txt'}, page_content='meow meow🐱 \\n'),\n",
|
||||
" Document(metadata={'line_number': 2, 'source': './meow.txt'}, page_content=' meow meow🐱 \\n'),\n",
|
||||
" Document(metadata={'line_number': 3, 'source': './meow.txt'}, page_content=' meow😻😻')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -359,20 +430,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"id": "20d03092-ba35-47d7-b612-9d1631c261cd",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.267755Z",
|
||||
"start_time": "2025-04-21T08:49:58.264369Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='some data from memory\\n', metadata={'line_number': 1, 'source': None}),\n",
|
||||
" Document(page_content='meow', metadata={'line_number': 2, 'source': None})]"
|
||||
"[Document(metadata={'line_number': 1, 'source': None}, page_content='some data from memory\\n'),\n",
|
||||
" Document(metadata={'line_number': 2, 'source': None}, page_content='meow')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -394,9 +469,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 12,
|
||||
"id": "a9e92e0e-c8da-401c-b8c6-f0676004cf58",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.330432Z",
|
||||
"start_time": "2025-04-21T08:49:58.327223Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -406,9 +485,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 13,
|
||||
"id": "6b559d30-8b0c-4e45-86b1-e4602d9aaa7e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.383905Z",
|
||||
"start_time": "2025-04-21T08:49:58.380658Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
@@ -418,7 +501,7 @@
|
||||
"'utf-8'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -429,9 +512,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 14,
|
||||
"id": "2f7b145a-9c6f-47f9-9487-1f4b25aff46f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.443829Z",
|
||||
"start_time": "2025-04-21T08:49:58.440222Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
@@ -441,7 +528,7 @@
|
||||
"b'meow meow\\xf0\\x9f\\x90\\xb1 \\n meow meow\\xf0\\x9f\\x90\\xb1 \\n meow\\xf0\\x9f\\x98\\xbb\\xf0\\x9f\\x98\\xbb'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -452,9 +539,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 15,
|
||||
"id": "9b9482fa-c49c-42cd-a2ef-80bc93214631",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.498609Z",
|
||||
"start_time": "2025-04-21T08:49:58.494903Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
@@ -464,7 +555,7 @@
|
||||
"'meow meow🐱 \\n meow meow🐱 \\n meow😻😻'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -475,19 +566,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 16,
|
||||
"id": "04cc7a81-290e-4ef8-b7e1-d885fcc59ece",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.551353Z",
|
||||
"start_time": "2025-04-21T08:49:58.547518Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<contextlib._GeneratorContextManager at 0x743f34324450>"
|
||||
"<contextlib._GeneratorContextManager at 0x74b8d42e9940>"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -498,9 +593,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 17,
|
||||
"id": "ec8de0ab-51d7-4e41-82c9-3ce0a6fdc2cd",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.599576Z",
|
||||
"start_time": "2025-04-21T08:49:58.596567Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
@@ -510,7 +609,7 @@
|
||||
"{'foo': 'bar'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -521,9 +620,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 18,
|
||||
"id": "19eae991-ae48-43c2-8952-7347cdb76a34",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.649634Z",
|
||||
"start_time": "2025-04-21T08:49:58.646313Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
@@ -533,7 +636,7 @@
|
||||
"'./meow.txt'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -551,65 +654,50 @@
|
||||
"\n",
|
||||
"While a parser encapsulates the logic needed to parse binary data into documents, *blob loaders* encapsulate the logic that's necessary to load blobs from a given storage location.\n",
|
||||
"\n",
|
||||
"At the moment, `LangChain` only supports `FileSystemBlobLoader`.\n",
|
||||
"At the moment, `LangChain` supports `FileSystemBlobLoader` and `CloudBlobLoader`.\n",
|
||||
"\n",
|
||||
"You can use the `FileSystemBlobLoader` to load blobs and then use the parser to parse them."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 19,
|
||||
"id": "c093becb-2e84-4329-89e3-956a3bd765e5",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:49:58.718259Z",
|
||||
"start_time": "2025-04-21T08:49:58.705367Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader\n",
|
||||
"\n",
|
||||
"blob_loader = FileSystemBlobLoader(path=\".\", glob=\"*.mdx\", show_progress=True)"
|
||||
"filesystem_blob_loader = FileSystemBlobLoader(\n",
|
||||
" path=\".\", glob=\"*.mdx\", show_progress=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "77739dab-2a1e-4b64-8daa-fee8aa029972",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "45e85d3f63224bb59db02a40ae2e3268",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='# Microsoft Office\\n' metadata={'line_number': 1, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='# Markdown\\n' metadata={'line_number': 1, 'source': 'markdown.mdx'}\n",
|
||||
"page_content='# JSON\\n' metadata={'line_number': 1, 'source': 'json.mdx'}\n",
|
||||
"page_content='---\\n' metadata={'line_number': 1, 'source': 'pdf.mdx'}\n",
|
||||
"page_content='---\\n' metadata={'line_number': 1, 'source': 'index.mdx'}\n",
|
||||
"page_content='# File Directory\\n' metadata={'line_number': 1, 'source': 'file_directory.mdx'}\n",
|
||||
"page_content='# CSV\\n' metadata={'line_number': 1, 'source': 'csv.mdx'}\n",
|
||||
"page_content='# HTML\\n' metadata={'line_number': 1, 'source': 'html.mdx'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"id": "21b91bad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -q tqdm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "40be670b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parser = MyParser()\n",
|
||||
"for blob in blob_loader.yield_blobs():\n",
|
||||
"for blob in filesystem_blob_loader.yield_blobs():\n",
|
||||
" for doc in parser.lazy_parse(blob):\n",
|
||||
" print(doc)\n",
|
||||
" break"
|
||||
@@ -620,56 +708,104 @@
|
||||
"id": "f016390c-d38b-4261-946d-34eefe546df7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generic Loader\n",
|
||||
"\n",
|
||||
"LangChain has a `GenericLoader` abstraction which composes a `BlobLoader` with a `BaseBlobParser`.\n",
|
||||
"\n",
|
||||
"`GenericLoader` is meant to provide standardized classmethods that make it easy to use existing `BlobLoader` implementations. At the moment, only the `FileSystemBlobLoader` is supported."
|
||||
"Or, you can use `CloudBlobLoader` to load blobs from a cloud storage location (Supports s3://, az://, gs://, file:// schemes)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "1de74daf-70ee-4616-9089-d28e26b16851",
|
||||
"execution_count": null,
|
||||
"id": "8210714e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -q 'cloudpathlib[s3]'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d3f84501-b0aa-4a60-aad2-5109cbd37d4f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```python\n",
|
||||
"from cloudpathlib import S3Client, S3Path\n",
|
||||
"from langchain_community.document_loaders.blob_loaders import CloudBlobLoader\n",
|
||||
"\n",
|
||||
"client = S3Client(no_sign_request=True)\n",
|
||||
"client.set_as_default_client()\n",
|
||||
"\n",
|
||||
"path = S3Path(\n",
|
||||
" \"s3://bucket-01\", client=client\n",
|
||||
") # Supports s3://, az://, gs://, file:// schemes.\n",
|
||||
"\n",
|
||||
"cloud_loader = CloudBlobLoader(path, glob=\"**/*.pdf\", show_progress=True)\n",
|
||||
"\n",
|
||||
"for blob in cloud_loader.yield_blobs():\n",
|
||||
" print(blob)\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "40c361ba4cd30164",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generic Loader\n",
|
||||
"\n",
|
||||
"LangChain has a `GenericLoader` abstraction which composes a `BlobLoader` with a `BaseBlobParser`.\n",
|
||||
"\n",
|
||||
"`GenericLoader` is meant to provide standardized classmethods that make it easy to use existing `BlobLoader` implementations. At the moment, the `FileSystemBlobLoader` and `CloudBlobLoader` are supported. See example below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "5dfb2be02fe662c5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:50:16.244917Z",
|
||||
"start_time": "2025-04-21T08:50:15.527562Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5f1f6810a71a4909ac9fe1e8f8cb9e0a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 7/7 [00:00<00:00, 1224.82it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='# Microsoft Office\\n' metadata={'line_number': 1, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='\\n' metadata={'line_number': 2, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='>[The Microsoft Office](https://www.office.com/) suite of productivity software includes Microsoft Word, Microsoft Excel, Microsoft PowerPoint, Microsoft Outlook, and Microsoft OneNote. It is available for Microsoft Windows and macOS operating systems. It is also available on Android and iOS.\\n' metadata={'line_number': 3, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='\\n' metadata={'line_number': 4, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='This covers how to load commonly used file formats including `DOCX`, `XLSX` and `PPTX` documents into a document format that we can use downstream.\\n' metadata={'line_number': 5, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='# Text embedding models\n",
|
||||
"' metadata={'line_number': 1, 'source': 'embed_text.mdx'}\n",
|
||||
"page_content='\n",
|
||||
"' metadata={'line_number': 2, 'source': 'embed_text.mdx'}\n",
|
||||
"page_content=':::info\n",
|
||||
"' metadata={'line_number': 3, 'source': 'embed_text.mdx'}\n",
|
||||
"page_content='Head to [Integrations](/docs/integrations/text_embedding/) for documentation on built-in integrations with text embedding model providers.\n",
|
||||
"' metadata={'line_number': 4, 'source': 'embed_text.mdx'}\n",
|
||||
"page_content=':::\n",
|
||||
"' metadata={'line_number': 5, 'source': 'embed_text.mdx'}\n",
|
||||
"... output truncated for demo purposes\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.generic import GenericLoader\n",
|
||||
"\n",
|
||||
"loader = GenericLoader.from_filesystem(\n",
|
||||
" path=\".\", glob=\"*.mdx\", show_progress=True, parser=MyParser()\n",
|
||||
"generic_loader_filesystem = GenericLoader(\n",
|
||||
" blob_loader=filesystem_blob_loader, blob_parser=parser\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for idx, doc in enumerate(loader.lazy_load()):\n",
|
||||
"for idx, doc in enumerate(generic_loader_filesystem.lazy_load()):\n",
|
||||
" if idx < 5:\n",
|
||||
" print(doc)\n",
|
||||
"\n",
|
||||
@@ -690,9 +826,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 28,
|
||||
"id": "23633102-dc44-4fed-a4e1-8159489101c8",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:50:34.841862Z",
|
||||
"start_time": "2025-04-21T08:50:34.838375Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -709,37 +849,46 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 29,
|
||||
"id": "dc95be85-4a29-4c6f-a260-08afa3c95538",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T08:50:34.901734Z",
|
||||
"start_time": "2025-04-21T08:50:34.888098Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4320598ea3b44a52b1873e1c801db312",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 7/7 [00:00<00:00, 814.86it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='# Microsoft Office\\n' metadata={'line_number': 1, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='\\n' metadata={'line_number': 2, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='>[The Microsoft Office](https://www.office.com/) suite of productivity software includes Microsoft Word, Microsoft Excel, Microsoft PowerPoint, Microsoft Outlook, and Microsoft OneNote. It is available for Microsoft Windows and macOS operating systems. It is also available on Android and iOS.\\n' metadata={'line_number': 3, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='\\n' metadata={'line_number': 4, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='This covers how to load commonly used file formats including `DOCX`, `XLSX` and `PPTX` documents into a document format that we can use downstream.\\n' metadata={'line_number': 5, 'source': 'office_file.mdx'}\n",
|
||||
"page_content='# Text embedding models\n",
|
||||
"' metadata={'line_number': 1, 'source': 'embed_text.mdx'}\n",
|
||||
"page_content='\n",
|
||||
"' metadata={'line_number': 2, 'source': 'embed_text.mdx'}\n",
|
||||
"page_content=':::info\n",
|
||||
"' metadata={'line_number': 3, 'source': 'embed_text.mdx'}\n",
|
||||
"page_content='Head to [Integrations](/docs/integrations/text_embedding/) for documentation on built-in integrations with text embedding model providers.\n",
|
||||
"' metadata={'line_number': 4, 'source': 'embed_text.mdx'}\n",
|
||||
"page_content=':::\n",
|
||||
"' metadata={'line_number': 5, 'source': 'embed_text.mdx'}\n",
|
||||
"... output truncated for demo purposes\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -769,7 +918,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
"\n",
|
||||
"LangChain integrates with a host of PDF parsers. Some are simple and relatively low-level; others will support OCR and image-processing, or perform advanced document layout analysis. The right choice will depend on your needs. Below we enumerate the possibilities.\n",
|
||||
"\n",
|
||||
"We will demonstrate these approaches on a [sample file](https://github.com/langchain-ai/langchain/blob/master/libs/community/tests/integration_tests/examples/layout-parser-paper.pdf):"
|
||||
"We will demonstrate these approaches on a [sample file](https://github.com/langchain-ai/langchain-community/blob/main/libs/community/tests/examples/layout-parser-paper.pdf):"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -167,7 +167,7 @@
|
||||
"She was, in 1906, the first woman to become a professor at the University of Paris.\n",
|
||||
"\"\"\"\n",
|
||||
"documents = [Document(page_content=text)]\n",
|
||||
"graph_documents = llm_transformer.convert_to_graph_documents(documents)\n",
|
||||
"graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)\n",
|
||||
"print(f\"Nodes:{graph_documents[0].nodes}\")\n",
|
||||
"print(f\"Relationships:{graph_documents[0].relationships}\")"
|
||||
]
|
||||
@@ -205,7 +205,7 @@
|
||||
" allowed_nodes=[\"Person\", \"Country\", \"Organization\"],\n",
|
||||
" allowed_relationships=[\"NATIONALITY\", \"LOCATED_IN\", \"WORKED_AT\", \"SPOUSE\"],\n",
|
||||
")\n",
|
||||
"graph_documents_filtered = llm_transformer_filtered.convert_to_graph_documents(\n",
|
||||
"graph_documents_filtered = await llm_transformer_filtered.aconvert_to_graph_documents(\n",
|
||||
" documents\n",
|
||||
")\n",
|
||||
"print(f\"Nodes:{graph_documents_filtered[0].nodes}\")\n",
|
||||
@@ -245,7 +245,9 @@
|
||||
" allowed_nodes=[\"Person\", \"Country\", \"Organization\"],\n",
|
||||
" allowed_relationships=allowed_relationships,\n",
|
||||
")\n",
|
||||
"graph_documents_filtered = llm_transformer_tuple.convert_to_graph_documents(documents)\n",
|
||||
"graph_documents_filtered = await llm_transformer_tuple.aconvert_to_graph_documents(\n",
|
||||
" documents\n",
|
||||
")\n",
|
||||
"print(f\"Nodes:{graph_documents_filtered[0].nodes}\")\n",
|
||||
"print(f\"Relationships:{graph_documents_filtered[0].relationships}\")"
|
||||
]
|
||||
@@ -289,7 +291,9 @@
|
||||
" allowed_relationships=[\"NATIONALITY\", \"LOCATED_IN\", \"WORKED_AT\", \"SPOUSE\"],\n",
|
||||
" node_properties=[\"born_year\"],\n",
|
||||
")\n",
|
||||
"graph_documents_props = llm_transformer_props.convert_to_graph_documents(documents)\n",
|
||||
"graph_documents_props = await llm_transformer_props.aconvert_to_graph_documents(\n",
|
||||
" documents\n",
|
||||
")\n",
|
||||
"print(f\"Nodes:{graph_documents_props[0].nodes}\")\n",
|
||||
"print(f\"Relationships:{graph_documents_props[0].relationships}\")"
|
||||
]
|
||||
|
||||
@@ -100,7 +100,7 @@
|
||||
"id": "8554bae5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A chat prompt is made up a of a list of messages. Similarly to the above example, we can concatenate chat prompt templates. Each new element is a new message in the final prompt.\n",
|
||||
"A chat prompt is made up of a list of messages. Similarly to the above example, we can concatenate chat prompt templates. Each new element is a new message in the final prompt.\n",
|
||||
"\n",
|
||||
"First, let's initialize the a [`ChatPromptTemplate`](https://python.langchain.com/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html) with a [`SystemMessage`](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.system.SystemMessage.html)."
|
||||
]
|
||||
|
||||
@@ -162,7 +162,7 @@
|
||||
"\n",
|
||||
"table_chain = prompt | llm_with_tools | output_parser\n",
|
||||
"\n",
|
||||
"table_chain.invoke({\"input\": \"What are all the genres of Alanis Morisette songs\"})"
|
||||
"table_chain.invoke({\"input\": \"What are all the genres of Alanis Morissette songs\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -206,7 +206,7 @@
|
||||
")\n",
|
||||
"\n",
|
||||
"category_chain = prompt | llm_with_tools | output_parser\n",
|
||||
"category_chain.invoke({\"input\": \"What are all the genres of Alanis Morisette songs\"})"
|
||||
"category_chain.invoke({\"input\": \"What are all the genres of Alanis Morissette songs\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -261,7 +261,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"table_chain = category_chain | get_tables\n",
|
||||
"table_chain.invoke({\"input\": \"What are all the genres of Alanis Morisette songs\"})"
|
||||
"table_chain.invoke({\"input\": \"What are all the genres of Alanis Morissette songs\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -313,7 +313,7 @@
|
||||
],
|
||||
"source": [
|
||||
"query = full_chain.invoke(\n",
|
||||
" {\"question\": \"What are all the genres of Alanis Morisette songs\"}\n",
|
||||
" {\"question\": \"What are all the genres of Alanis Morissette songs\"}\n",
|
||||
")\n",
|
||||
"print(query)"
|
||||
]
|
||||
@@ -346,7 +346,7 @@
|
||||
"source": [
|
||||
"We can see the LangSmith trace for this run [here](https://smith.langchain.com/public/4fbad408-3554-4f33-ab47-1e510a1b52a3/r).\n",
|
||||
"\n",
|
||||
"We've seen how to dynamically include a subset of table schemas in a prompt within a chain. Another possible approach to this problem is to let an Agent decide for itself when to look up tables by giving it a Tool to do so. You can see an example of this in the [SQL: Agents](/docs/tutorials/agents) guide."
|
||||
"We've seen how to dynamically include a subset of table schemas in a prompt within a chain. Another possible approach to this problem is to let an Agent decide for itself when to look up tables by giving it a Tool to do so. You can see an example of this in the [SQL: Agents](/docs/tutorials/sql_qa/#agents) guide."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -555,7 +555,7 @@
|
||||
"source": [
|
||||
"We can see that with retrieval we're able to correct the spelling from \"Elenis Moriset\" to \"Alanis Morissette\" and get back a valid result.\n",
|
||||
"\n",
|
||||
"Another possible approach to this problem is to let an Agent decide for itself when to look up proper nouns. You can see an example of this in the [SQL: Agents](/docs/tutorials/agents) guide."
|
||||
"Another possible approach to this problem is to let an Agent decide for itself when to look up proper nouns. You can see an example of this in the [SQL: Agents](/docs/tutorials/sql_qa/#agents) guide."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
"source": [
|
||||
"# How to disable parallel tool calling\n",
|
||||
"\n",
|
||||
":::info OpenAI-specific\n",
|
||||
":::info Provider-specific\n",
|
||||
"\n",
|
||||
"This API is currently only supported by OpenAI.\n",
|
||||
"This API is currently only supported by OpenAI and Anthropic.\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
@@ -55,12 +55,12 @@
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"if \"OPENAI_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)"
|
||||
"llm = init_chat_model(\"openai:gpt-4.1-mini\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -121,7 +121,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -83,21 +83,28 @@ agent_executor.run("how many letters in the word educa?", callbacks=[handler])
|
||||
Another example:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools, initialize_agent, AgentType
|
||||
from langchain_openai import OpenAI
|
||||
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
|
||||
import os
|
||||
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
os.environ["LLMONITOR_APP_ID"] = ""
|
||||
os.environ["OPENAI_API_KEY"] = ""
|
||||
os.environ["SERPAPI_API_KEY"] = ""
|
||||
|
||||
handler = LLMonitorCallbackHandler()
|
||||
|
||||
llm = OpenAI(temperature=0)
|
||||
llm = ChatOpenAI(temperature=0, callbacks=[handler])
|
||||
tools = load_tools(["serpapi", "llm-math"], llm=llm)
|
||||
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, metadata={ "agent_name": "GirlfriendAgeFinder" }) # <- recommended, assign a custom name
|
||||
agent = create_react_agent("openai:gpt-4.1-mini", tools)
|
||||
|
||||
agent.run(
|
||||
"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?",
|
||||
callbacks=[handler],
|
||||
)
|
||||
input_message = {
|
||||
"role": "user",
|
||||
"content": "What's the weather in SF?",
|
||||
}
|
||||
|
||||
agent.invoke({"messages": [input_message]})
|
||||
```
|
||||
|
||||
## User Tracking
|
||||
@@ -110,7 +117,7 @@ with identify("user-123"):
|
||||
llm.invoke("Tell me a joke")
|
||||
|
||||
with identify("user-456", user_props={"email": "user456@test.com"}):
|
||||
agent.run("Who is Leo DiCaprio's girlfriend?")
|
||||
agent.invoke(...)
|
||||
```
|
||||
## Support
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -41,7 +41,7 @@
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Head to https://www.cloudflare.com/developer-platform/products/workers-ai/ to sign up to CloudflareWorkersAI and generate an API key. Once you've done this set the CF_API_KEY environment variable and the CF_ACCOUNT_ID environment variable:"
|
||||
"Head to https://www.cloudflare.com/developer-platform/products/workers-ai/ to sign up to CloudflareWorkersAI and generate an API key. Once you've done this set the CF_AI_API_KEY environment variable and the CF_ACCOUNT_ID environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -56,8 +56,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"CF_API_KEY\"):\n",
|
||||
" os.environ[\"CF_API_KEY\"] = getpass.getpass(\n",
|
||||
"if not os.getenv(\"CF_AI_API_KEY\"):\n",
|
||||
" os.environ[\"CF_AI_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your CloudflareWorkersAI API key: \"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
|
||||
308
docs/docs/integrations/chat/featherless_ai.ipynb
Normal file
308
docs/docs/integrations/chat/featherless_ai.ipynb
Normal file
@@ -0,0 +1,308 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Featherless AI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e49f1e0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatFeatherlessAi\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This will help you getting started with FeatherlessAi [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatFeatherlessAi features and configurations head to the [API reference](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/__module_name__.chat_models.ChatFeatherlessAi.html).\n",
|
||||
"\n",
|
||||
"- See https://featherless.ai/ for an example.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/__package_name_short_snake__) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatFeatherlessAi](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/__module_name__.chat_models.ChatFeatherlessAi.html) | [langchain-featherless-ai](https://python.langchain.com/api_reference/__package_name_short_snake__/) | ✅ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ❌ | ❌ | ✅| ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To access Featherless AI models you'll need to create a/an Featherless AI account, get an API key, and install the `langchain-featherless-ai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Head to https://featherless.ai/ to sign up to FeatherlessAI and generate an API key. Once you've done this set the FEATHERLESSAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "433e8d2b-9519-4b49-b2c4-7ab65b046c94",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"FEATHERLESSAI_API_KEY\"):\n",
|
||||
" os.environ[\"FEATHERLESSAI_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your FeatherlessAI API key: \"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72ee0c4b-9764-423a-9dbf-95129e185210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain FeatherlessAi integration lives in the `langchain-featherless-ai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-featherless-ai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_featherless_ai import ChatFeatherlessAi\n",
|
||||
"\n",
|
||||
"llm = ChatFeatherlessAi(\n",
|
||||
" model=\"featherless-ai/Qwerky-72B\",\n",
|
||||
" temperature=0.9,\n",
|
||||
" max_tokens=None,\n",
|
||||
" timeout=None,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b4f3e15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Python311\\Lib\\site-packages\\pydantic\\main.py:463: UserWarning: Pydantic serializer warnings:\n",
|
||||
" PydanticSerializationUnexpectedValue(Expected `int` - serialized value may not be as expected [input_value=1747322408.706, input_type=float])\n",
|
||||
" return self.__pydantic_serializer__.to_python(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'aime programmer.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 27, 'total_tokens': 32, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'featherless-ai/Qwerky-72B', 'system_fingerprint': '', 'id': 'G1sgui', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--6ecbe184-c94e-4d03-bf75-9bd85b04ba5b-0', usage_metadata={'input_tokens': 27, 'output_tokens': 5, 'total_tokens': 32, 'input_token_details': {}, 'output_token_details': {}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"J'aime programmer.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fca9e713",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Python311\\Lib\\site-packages\\pydantic\\main.py:463: UserWarning: Pydantic serializer warnings:\n",
|
||||
" PydanticSerializationUnexpectedValue(Expected `int` - serialized value may not be as expected [input_value=1747322423.487, input_type=float])\n",
|
||||
" return self.__pydantic_serializer__.to_python(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe Programmieren.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 22, 'total_tokens': 27, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'featherless-ai/Qwerky-72B', 'system_fingerprint': '', 'id': 'BoBqht', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--67464357-83d1-4591-9a62-303ed74b8148-0', usage_metadata={'input_tokens': 22, 'output_tokens': 5, 'total_tokens': 27, 'input_token_details': {}, 'output_token_details': {}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatFeatherlessAi features and configurations head to the [API reference](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/.chat_models.ChatFeatherlessAi.html)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,117 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"# GigaChat\n",
|
||||
"This notebook shows how to use LangChain with [GigaChat](https://developers.sber.ru/portal/products/gigachat).\n",
|
||||
"To use you need to install ```langchain_gigachat``` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-gigachat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"To get GigaChat credentials you need to [create account](https://developers.sber.ru/studio/login) and [get access to API](https://developers.sber.ru/docs/ru/gigachat/individuals-quickstart)\n",
|
||||
"\n",
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"if \"GIGACHAT_CREDENTIALS\" not in os.environ:\n",
|
||||
" os.environ[\"GIGACHAT_CREDENTIALS\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_gigachat import GigaChat\n",
|
||||
"\n",
|
||||
"chat = GigaChat(verify_ssl_certs=False, scope=\"GIGACHAT_API_PERS\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The capital of Russia is Moscow.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=\"You are a helpful AI that shares everything you know. Talk in English.\"\n",
|
||||
" ),\n",
|
||||
" HumanMessage(content=\"What is capital of Russia?\"),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"print(chat.invoke(messages).content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -915,6 +915,175 @@
|
||||
"response_2.text()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "34ad0015-688c-4274-be55-93268b44f558",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Code interpreter\n",
|
||||
"\n",
|
||||
"OpenAI implements a [code interpreter](https://platform.openai.com/docs/guides/tools-code-interpreter) tool to support the sandboxed generation and execution of code.\n",
|
||||
"\n",
|
||||
"Example use:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "34826aae-6d48-4b84-bc00-89594a87d461",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"o4-mini\", use_responses_api=True)\n",
|
||||
"\n",
|
||||
"llm_with_tools = llm.bind_tools(\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"type\": \"code_interpreter\",\n",
|
||||
" # Create a new container\n",
|
||||
" \"container\": {\"type\": \"auto\"},\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"response = llm_with_tools.invoke(\n",
|
||||
" \"Write and run code to answer the question: what is 3^3?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b4d92b9-941f-4d54-93a5-b0c73afd66b2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that the above command created a new container. We can also specify an existing container ID:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d8c82895-5011-4062-a1bb-278ec91321e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool_outputs = response.additional_kwargs[\"tool_outputs\"]\n",
|
||||
"assert len(tool_outputs) == 1\n",
|
||||
"# highlight-next-line\n",
|
||||
"container_id = tool_outputs[0][\"container_id\"]\n",
|
||||
"\n",
|
||||
"llm_with_tools = llm.bind_tools(\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"type\": \"code_interpreter\",\n",
|
||||
" # Use an existing container\n",
|
||||
" # highlight-next-line\n",
|
||||
" \"container\": container_id,\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8db30501-522c-4915-963d-d60539b5c16e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Remote MCP\n",
|
||||
"\n",
|
||||
"OpenAI implements a [remote MCP](https://platform.openai.com/docs/guides/tools-remote-mcp) tool that allows for model-generated calls to MCP servers.\n",
|
||||
"\n",
|
||||
"Example use:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "7044a87b-8b99-49e8-8ca4-e2a8ae49f65a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"o4-mini\", use_responses_api=True)\n",
|
||||
"\n",
|
||||
"llm_with_tools = llm.bind_tools(\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"type\": \"mcp\",\n",
|
||||
" \"server_label\": \"deepwiki\",\n",
|
||||
" \"server_url\": \"https://mcp.deepwiki.com/mcp\",\n",
|
||||
" \"require_approval\": \"never\",\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"response = llm_with_tools.invoke(\n",
|
||||
" \"What transport protocols does the 2025-03-26 version of the MCP \"\n",
|
||||
" \"spec (modelcontextprotocol/modelcontextprotocol) support?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0ed7494e-425d-4bdf-ab83-3164757031dd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<details>\n",
|
||||
"<summary>MCP Approvals</summary>\n",
|
||||
"\n",
|
||||
"OpenAI will at times request approval before sharing data with a remote MCP server.\n",
|
||||
"\n",
|
||||
"In the above command, we instructed the model to never require approval. We can also configure the model to always request approval, or to always request approval for specific tools:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"llm_with_tools = llm.bind_tools(\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"type\": \"mcp\",\n",
|
||||
" \"server_label\": \"deepwiki\",\n",
|
||||
" \"server_url\": \"https://mcp.deepwiki.com/mcp\",\n",
|
||||
" \"require_approval\": {\n",
|
||||
" \"always\": {\n",
|
||||
" \"tool_names\": [\"read_wiki_structure\"]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"response = llm_with_tools.invoke(\n",
|
||||
" \"What transport protocols does the 2025-03-26 version of the MCP \"\n",
|
||||
" \"spec (modelcontextprotocol/modelcontextprotocol) support?\"\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Responses may then include blocks with type `\"mcp_approval_request\"`.\n",
|
||||
"\n",
|
||||
"To submit approvals for an approval request, structure it into a content block in an input message:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"approval_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"mcp_approval_response\",\n",
|
||||
" \"approve\": True,\n",
|
||||
" \"approval_request_id\": output[\"id\"],\n",
|
||||
" }\n",
|
||||
" for output in response.additional_kwargs[\"tool_outputs\"]\n",
|
||||
" if output[\"type\"] == \"mcp_approval_request\"\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"next_response = llm_with_tools.invoke(\n",
|
||||
" [approval_message],\n",
|
||||
" # continue existing thread\n",
|
||||
" previous_response_id=response.response_metadata[\"id\"]\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"</details>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fda05f0-4b81-4709-9407-f316d760ad50",
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API."
|
||||
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
|
||||
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
|
||||
"> through an easy-to-use JSON API."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,20 +6,54 @@
|
||||
"source": [
|
||||
"# Confluence\n",
|
||||
"\n",
|
||||
">[Confluence](https://www.atlassian.com/software/confluence) is a wiki collaboration platform that saves and organizes all of the project-related material. `Confluence` is a knowledge base that primarily handles content management activities. \n",
|
||||
"[Confluence](https://www.atlassian.com/software/confluence) is a wiki collaboration platform designed to save and organize all project-related materials. As a knowledge base, Confluence primarily serves content management activities.\n",
|
||||
"\n",
|
||||
"A loader for `Confluence` pages.\n",
|
||||
"This loader allows you to fetch and process Confluence pages into `Document` objects.\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"This currently supports `username/api_key`, `Oauth2 login`, `cookies`. Additionally, on-prem installations also support `token` authentication. \n",
|
||||
"## Authentication Methods\n",
|
||||
"\n",
|
||||
"The following authentication methods are supported:\n",
|
||||
"\n",
|
||||
"Specify a list `page_id`-s and/or `space_key` to load in the corresponding pages into Document objects, if both are specified the union of both sets will be returned.\n",
|
||||
"- `username/api_key`\n",
|
||||
"- `OAuth2 login`\n",
|
||||
"- `cookies`\n",
|
||||
"- On-premises installations: `token` authentication\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"You can also specify a boolean `include_attachments` to include attachments, this is set to False by default, if set to True all attachments will be downloaded and ConfluenceReader will extract the text from the attachments and add it to the Document object. Currently supported attachment types are: `PDF`, `PNG`, `JPEG/JPG`, `SVG`, `Word` and `Excel`.\n",
|
||||
"## Page Selection\n",
|
||||
"\n",
|
||||
"Hint: `space_key` and `page_id` can both be found in the URL of a page in Confluence - https://yoursite.atlassian.com/wiki/spaces/<space_key>/pages/<page_id>\n"
|
||||
"You can specify which pages to load using:\n",
|
||||
"\n",
|
||||
"- **page_ids** (*list*): \n",
|
||||
" A list of `page_id` values to load the corresponding pages.\n",
|
||||
"\n",
|
||||
"- **space_key** (*string*): \n",
|
||||
" A string of `space_key` value to load all pages within the specified confluence space.\n",
|
||||
"\n",
|
||||
"If both `page_ids` and `space_key` are provided, the loader will return the union of pages from both lists.\n",
|
||||
"\n",
|
||||
"*Hint:* Both `space_key` and `page_id` can be found in the URL of a Confluence page: \n",
|
||||
"`https://yoursite.atlassian.com/wiki/spaces/{space_key}/pages/{page_id}`\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Attachments\n",
|
||||
"\n",
|
||||
"You may include attachments in the loaded `Document` objects by setting the boolean parameter **include_attachments** to `True` (default: `False`). When enabled, all attachments are downloaded and their text content is extracted and added to the Document.\n",
|
||||
"\n",
|
||||
"**Currently supported attachment types:**\n",
|
||||
"\n",
|
||||
"- PDF (`.pdf`)\n",
|
||||
"- PNG (`.png`)\n",
|
||||
"- JPEG/JPG (`.jpeg`, `.jpg`)\n",
|
||||
"- SVG (`.svg`)\n",
|
||||
"- Word (`.doc`, `.docx`)\n",
|
||||
"- Excel (`.xls`, `.xlsx`)\n",
|
||||
"\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -70,9 +104,14 @@
|
||||
"from langchain_community.document_loaders import ConfluenceLoader\n",
|
||||
"\n",
|
||||
"loader = ConfluenceLoader(\n",
|
||||
" url=\"https://yoursite.atlassian.com/wiki\", username=\"me\", api_key=\"12345\"\n",
|
||||
" url=\"https://yoursite.atlassian.com/wiki\",\n",
|
||||
" username=\"<your-confluence-username>\",\n",
|
||||
" api_key=\"<your-api-token>\",\n",
|
||||
" space_key=\"<your-space-key>\",\n",
|
||||
" include_attachments=True,\n",
|
||||
" limit=50,\n",
|
||||
")\n",
|
||||
"documents = loader.load(space_key=\"SPACE\", include_attachments=True, limit=50)"
|
||||
"documents = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -95,10 +134,15 @@
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import ConfluenceLoader\n",
|
||||
"\n",
|
||||
"loader = ConfluenceLoader(url=\"https://yoursite.atlassian.com/wiki\", token=\"12345\")\n",
|
||||
"documents = loader.load(\n",
|
||||
" space_key=\"SPACE\", include_attachments=True, limit=50, max_pages=50\n",
|
||||
")"
|
||||
"loader = ConfluenceLoader(\n",
|
||||
" url=\"https://confluence.yoursite.com/\",\n",
|
||||
" token=\"<your-personal-access-token>\",\n",
|
||||
" space_key=\"<your-space-key>\",\n",
|
||||
" include_attachments=True,\n",
|
||||
" limit=50,\n",
|
||||
" max_pages=50,\n",
|
||||
")\n",
|
||||
"documents = loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-firestore"
|
||||
"%pip install --upgrade --quiet langchain-google-firestore"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1214,9 +1214,7 @@
|
||||
"source": [
|
||||
"### Connecting to the DB\n",
|
||||
"\n",
|
||||
"The Cassandra caches shown in this page can be used with Cassandra as well as other derived databases, such as Astra DB, which use the CQL (Cassandra Query Language) protocol.\n",
|
||||
"\n",
|
||||
"> DataStax [Astra DB](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths.\n",
|
||||
"The Cassandra caches shown in this page can be used with Cassandra as well as other derived databases that can use the CQL (Cassandra Query Language) protocol, such as DataStax Astra DB.\n",
|
||||
"\n",
|
||||
"Depending on whether you connect to a Cassandra cluster or to Astra DB through CQL, you will provide different parameters when instantiating the cache (through initialization of a CassIO connection)."
|
||||
]
|
||||
@@ -1517,6 +1515,12 @@
|
||||
"source": [
|
||||
"You can easily use [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as an LLM cache, with either the \"exact\" or the \"semantic-based\" cache.\n",
|
||||
"\n",
|
||||
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
|
||||
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
|
||||
"> through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"_This approach differs from the `Cassandra` caches mentioned above in that it natively uses the HTTP Data API. The Data API is specific to Astra DB. Keep in mind that the storage format will also differ._\n",
|
||||
"\n",
|
||||
"Make sure you have a running database (it must be a Vector-enabled database to use the Semantic cache) and get the required credentials on your Astra dashboard:\n",
|
||||
"\n",
|
||||
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
@@ -3160,7 +3164,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,118 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"# GigaChat\n",
|
||||
"This notebook shows how to use LangChain with [GigaChat](https://developers.sber.ru/portal/products/gigachat).\n",
|
||||
"To use you need to install ```gigachat``` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet gigachat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"To get GigaChat credentials you need to [create account](https://developers.sber.ru/studio/login) and [get access to API](https://developers.sber.ru/docs/ru/gigachat/individuals-quickstart)\n",
|
||||
"\n",
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"if \"GIGACHAT_CREDENTIALS\" not in os.environ:\n",
|
||||
" os.environ[\"GIGACHAT_CREDENTIALS\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.llms import GigaChat\n",
|
||||
"\n",
|
||||
"llm = GigaChat(verify_ssl_certs=False, scope=\"GIGACHAT_API_PERS\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The capital of Russia is Moscow.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"What is capital of {country}?\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"generated = llm_chain.invoke(input={\"country\": \"Russia\"})\n",
|
||||
"print(generated[\"text\"])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -7,7 +7,9 @@
|
||||
"source": [
|
||||
"# Astra DB \n",
|
||||
"\n",
|
||||
"> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
|
||||
"> AI-ready database built on `Apache Cassandra®` and made conveniently availablev\n",
|
||||
"> through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Astra DB to store chat message history."
|
||||
]
|
||||
|
||||
@@ -8,17 +8,19 @@
|
||||
Install the AVS Python SDK and AVS langchain vector store:
|
||||
|
||||
```bash
|
||||
pip install aerospike-vector-search langchain-community
|
||||
pip install aerospike-vector-search langchain-aerospike
|
||||
```
|
||||
|
||||
See the documentation for the Ptyhon SDK [here](https://aerospike-vector-search-python-client.readthedocs.io/en/latest/index.html).
|
||||
The documentation for the AVS langchain vector store is [here](https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.aerospike.Aerospike.html).
|
||||
See the documentation for the Python SDK [here](https://aerospike-vector-search-python-client.readthedocs.io/en/latest/index.html).
|
||||
The documentation for the AVS langchain vector store is [here](https://langchain-aerospike.readthedocs.io/en/latest/).
|
||||
|
||||
## Vector Store
|
||||
|
||||
To import this vectorstore:
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores import Aerospike
|
||||
from langchain_aerospike.vectorstores import Aerospike
|
||||
```
|
||||
|
||||
See a usage example [here](https://python.langchain.com/docs/integrations/vectorstores/aerospike/).
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# Astra DB
|
||||
|
||||
> [DataStax Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless
|
||||
> vector-capable database built on `Apache Cassandra®`and made conveniently available
|
||||
> through an easy-to-use JSON API.
|
||||
> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless AI-ready database built on `Apache Cassandra®` and made conveniently available through an easy-to-use JSON API.
|
||||
|
||||
See a [tutorial provided by DataStax](https://docs.datastax.com/en/astra/astra-db-vector/tutorials/chatbot.html).
|
||||
|
||||
@@ -10,19 +8,21 @@ See a [tutorial provided by DataStax](https://docs.datastax.com/en/astra/astra-d
|
||||
|
||||
Install the following Python package:
|
||||
```bash
|
||||
pip install "langchain-astradb>=0.1.0"
|
||||
pip install "langchain-astradb>=0.6,<0.7"
|
||||
```
|
||||
|
||||
Get the [connection secrets](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html).
|
||||
Set up the following environment variables:
|
||||
Create a database (if needed) and get the [connection secrets](https://docs.datastax.com/en/astra-db-serverless/get-started/quickstart.html#create-a-database-and-store-your-credentials).
|
||||
Set the following variables:
|
||||
|
||||
```python
|
||||
ASTRA_DB_APPLICATION_TOKEN="TOKEN"
|
||||
ASTRA_DB_API_ENDPOINT="API_ENDPOINT"
|
||||
ASTRA_DB_APPLICATION_TOKEN="TOKEN"
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
A few typical initialization patterns are shown here:
|
||||
|
||||
```python
|
||||
from langchain_astradb import AstraDBVectorStore
|
||||
|
||||
@@ -32,8 +32,56 @@ vector_store = AstraDBVectorStore(
|
||||
api_endpoint=ASTRA_DB_API_ENDPOINT,
|
||||
token=ASTRA_DB_APPLICATION_TOKEN,
|
||||
)
|
||||
|
||||
|
||||
from astrapy.info import VectorServiceOptions
|
||||
|
||||
vector_store_vectorize = AstraDBVectorStore(
|
||||
collection_name="my_vectorize_store",
|
||||
api_endpoint=ASTRA_DB_API_ENDPOINT,
|
||||
token=ASTRA_DB_APPLICATION_TOKEN,
|
||||
collection_vector_service_options=VectorServiceOptions(
|
||||
provider="nvidia",
|
||||
model_name="NV-Embed-QA",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
from astrapy.info import (
|
||||
CollectionLexicalOptions,
|
||||
CollectionRerankOptions,
|
||||
RerankServiceOptions,
|
||||
VectorServiceOptions,
|
||||
)
|
||||
|
||||
vector_store_hybrid = AstraDBVectorStore(
|
||||
collection_name="my_hybrid_store",
|
||||
api_endpoint=ASTRA_DB_API_ENDPOINT,
|
||||
token=ASTRA_DB_APPLICATION_TOKEN,
|
||||
collection_vector_service_options=VectorServiceOptions(
|
||||
provider="nvidia",
|
||||
model_name="NV-Embed-QA",
|
||||
),
|
||||
collection_lexical=CollectionLexicalOptions(analyzer="standard"),
|
||||
collection_rerank=CollectionRerankOptions(
|
||||
service=RerankServiceOptions(
|
||||
provider="nvidia",
|
||||
model_name="nvidia/llama-3.2-nv-rerankqa-1b-v2",
|
||||
),
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
Notable features of class `AstraDBVectorStore`:
|
||||
|
||||
- native async API;
|
||||
- metadata filtering in search;
|
||||
- MMR (maximum marginal relevance) search;
|
||||
- server-side embedding computation (["vectorize"](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html) in Astra DB parlance);
|
||||
- auto-detect its settings from an existing, pre-populated Astra DB collection;
|
||||
- [hybrid search](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html#the-hybrid-search-process) (vector + BM25 and then a rerank step);
|
||||
- support for non-Astra Data API (e.g. self-hosted [HCD](https://docs.datastax.com/en/hyper-converged-database/1.1/get-started/get-started-hcd.html) deployments);
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/vectorstores/astradb).
|
||||
|
||||
See the [example provided by DataStax](https://docs.datastax.com/en/astra/astra-db-vector/integrations/langchain.html).
|
||||
@@ -82,8 +130,6 @@ set_llm_cache(AstraDBSemanticCache(
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/llm_caching#astra-db-caches) (scroll to the appropriate section).
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/memory/astradb_chat_message_history).
|
||||
|
||||
## Document loader
|
||||
|
||||
```python
|
||||
|
||||
34
docs/docs/integrations/providers/brightdata.mdx
Normal file
34
docs/docs/integrations/providers/brightdata.mdx
Normal file
@@ -0,0 +1,34 @@
|
||||
# Bright Data
|
||||
|
||||
[Bright Data](https://brightdata.com) is a web data platform that provides tools for web scraping, SERP collection, and accessing geo-restricted content.
|
||||
|
||||
Bright Data allows developers to extract structured data from websites, perform search engine queries, and access content that might be otherwise blocked or geo-restricted. The platform is designed to help overcome common web scraping challenges including anti-bot systems, CAPTCHAs, and IP blocks.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```bash
|
||||
pip install langchain-brightdata
|
||||
```
|
||||
|
||||
You'll need to set up your Bright Data API key:
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ["BRIGHT_DATA_API_KEY"] = "your-api-key"
|
||||
```
|
||||
|
||||
Or you can pass it directly when initializing tools:
|
||||
|
||||
```python
|
||||
from langchain_bright_data import BrightDataSERP
|
||||
|
||||
tool = BrightDataSERP(bright_data_api_key="your-api-key")
|
||||
```
|
||||
|
||||
## Tools
|
||||
|
||||
The Bright Data integration provides several tools:
|
||||
|
||||
- [BrightDataSERP](/docs/integrations/tools/brightdata_serp) - Search engine results collection with geo-targeting
|
||||
- [BrightDataUnblocker](/docs/integrations/tools/brightdata_unlocker) - Access ANY public website that might be geo-restricted or bot-protected
|
||||
- [BrightDataWebScraperAPI](/docs/integrations/tools/brightdata-webscraperapi) - Extract structured data from 100+ ppoular domains, e.g. Amazon product details and LinkedIn profiles
|
||||
@@ -32,7 +32,7 @@ For a detailed walkthrough of this wrapper, see [this notebook](/docs/integratio
|
||||
You can also load this wrapper as a Tool to use with an Agent:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["dataforseo-api-search"])
|
||||
```
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Doctran
|
||||
|
||||
>[Doctran](https://github.com/psychic-api/doctran) is a python package. It uses LLMs and open-source
|
||||
> NLP libraries to transform raw text into clean, structured, information-dense documents
|
||||
> that are optimized for vector space retrieval. You can think of `Doctran` as a black box where
|
||||
>[Doctran](https://github.com/psychic-api/doctran) is a python package. It uses LLMs and open-source
|
||||
> NLP libraries to transform raw text into clean, structured, information-dense documents
|
||||
> that are optimized for vector space retrieval. You can think of `Doctran` as a black box where
|
||||
> messy strings go in and nice, clean, labelled strings come out.
|
||||
|
||||
|
||||
@@ -19,19 +19,19 @@ pip install doctran
|
||||
See a [usage example for DoctranQATransformer](/docs/integrations/document_transformers/doctran_interrogate_document).
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import DoctranQATransformer
|
||||
from langchain_community.document_transformers import DoctranQATransformer
|
||||
```
|
||||
### Property Extractor
|
||||
|
||||
See a [usage example for DoctranPropertyExtractor](/docs/integrations/document_transformers/doctran_extract_properties).
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import DoctranPropertyExtractor
|
||||
from langchain_community.document_transformers import DoctranPropertyExtractor
|
||||
```
|
||||
### Document Translator
|
||||
|
||||
See a [usage example for DoctranTextTranslator](/docs/integrations/document_transformers/doctran_translate_document).
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import DoctranTextTranslator
|
||||
from langchain_community.document_transformers import DoctranTextTranslator
|
||||
```
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
12
docs/docs/integrations/providers/featherless-ai.mdx
Normal file
12
docs/docs/integrations/providers/featherless-ai.mdx
Normal file
@@ -0,0 +1,12 @@
|
||||
# Featherless AI
|
||||
|
||||
[Featherless AI](https://featherless.ai/) is a serverless AI inference platform that offers access to over 4300+ open-source models. Our goal is to make all AI models available for serverless inference. We provide inference via API to a continually expanding library of open-weight models.
|
||||
|
||||
# Installation and Setup
|
||||
`pip install langchain-featherless-ai`
|
||||
1. Sign up for an account at [Featherless](https://featherless.ai/register)
|
||||
2. Subscribe to a plan and get your API key from [API Keys](https://featherless.ai/account/api-keys)
|
||||
3. Set up your API key as an environment variable(`FEATHERLESSAI_API_KEY`)
|
||||
|
||||
# Model catalog
|
||||
Visit our model catalog for an overview of all our models: https://featherless.ai/models
|
||||
@@ -1,6 +1,6 @@
|
||||
# Friendli AI
|
||||
|
||||
> [FriendliAI](https://friendli.ai/) enhances AI application performance and optimizes
|
||||
> [FriendliAI](https://friendli.ai/) enhances AI application performance and optimizes
|
||||
> cost savings with scalable, efficient deployment options, tailored for high-demand AI workloads.
|
||||
|
||||
## Installation and setup
|
||||
@@ -11,8 +11,8 @@ Install the `friendli-client` python package.
|
||||
pip install -U langchain_community friendli-client
|
||||
```
|
||||
|
||||
Sign in to [Friendli Suite](https://suite.friendli.ai/) to create a Personal Access Token,
|
||||
and set it as the `FRIENDLI_TOKEN` environment variabzle.
|
||||
Sign in to [Friendli Suite](https://suite.friendli.ai/) to create a Personal Access Token,
|
||||
and set it as the `FRIENDLI_TOKEN` environment variable.
|
||||
|
||||
|
||||
## Chat models
|
||||
|
||||
56
docs/docs/integrations/providers/gel.mdx
Normal file
56
docs/docs/integrations/providers/gel.mdx
Normal file
@@ -0,0 +1,56 @@
|
||||
# Gel
|
||||
|
||||
[Gel](https://www.geldata.com/) is a powerful data platform built on top of PostgreSQL.
|
||||
|
||||
- Think in objects and graphs instead of tables and JOINs.
|
||||
- Use the advanced Python SDK, integrated GUI, migrations engine, Auth and AI layers, and much more.
|
||||
- Run locally, remotely, or in a [fully managed cloud](https://www.geldata.com/cloud).
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install langchain-gel
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
1. Run `gel project init`
|
||||
2. Edit the schema. You need the following types to use the LangChain vectorstore:
|
||||
|
||||
```gel
|
||||
using extension pgvector;
|
||||
|
||||
module default {
|
||||
scalar type EmbeddingVector extending ext::pgvector::vector<1536>;
|
||||
|
||||
type Record {
|
||||
required collection: str;
|
||||
text: str;
|
||||
embedding: EmbeddingVector;
|
||||
external_id: str {
|
||||
constraint exclusive;
|
||||
};
|
||||
metadata: json;
|
||||
|
||||
index ext::pgvector::hnsw_cosine(m := 16, ef_construction := 128)
|
||||
on (.embedding)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> Note: this is the minimal setup. Feel free to add as many types, properties and links as you want!
|
||||
> Learn more about taking advantage of Gel's schema by reading the [docs](https://docs.geldata.com/learn/schema).
|
||||
|
||||
3. Run the migration: `gel migration create && gel migrate`.
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from langchain_gel import GelVectorStore
|
||||
|
||||
vector_store = GelVectorStore(
|
||||
embeddings=embeddings,
|
||||
)
|
||||
```
|
||||
|
||||
See the full usage example [here](/docs/integrations/vectorstores/gel).
|
||||
@@ -27,7 +27,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
|
||||
You can also easily load this wrapper as a Tool (to use with an Agent).
|
||||
You can do this with:
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["golden-query"])
|
||||
```
|
||||
|
||||
|
||||
@@ -880,7 +880,7 @@ from langchain_community.tools import GoogleSearchRun, GoogleSearchResults
|
||||
|
||||
Agent Loading:
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["google-search"])
|
||||
```
|
||||
|
||||
@@ -1313,7 +1313,7 @@ from langchain_community.tools import GoogleSearchRun, GoogleSearchResults
|
||||
|
||||
Agent Loading:
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["google-search"])
|
||||
```
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
|
||||
You can also easily load this wrapper as a Tool (to use with an Agent).
|
||||
You can do this with:
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["google-serper"])
|
||||
```
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Groq
|
||||
|
||||
>[Groq](https://groq.com)developed the world's first Language Processing Unit™, or `LPU`.
|
||||
>[Groq](https://groq.com) developed the world's first Language Processing Unit™, or `LPU`.
|
||||
> The `Groq LPU` has a deterministic, single core streaming architecture that sets the standard
|
||||
> for GenAI inference speed with predictable and repeatable performance for any given workload.
|
||||
>
|
||||
|
||||
@@ -21,13 +21,3 @@ To import this vectorstore:
|
||||
from langchain_milvus import Milvus
|
||||
```
|
||||
|
||||
## Retrievers
|
||||
|
||||
See a [usage example](/docs/integrations/retrievers/milvus_hybrid_search).
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever
|
||||
from langchain_milvus.utils.sparse import BM25SparseEmbedding
|
||||
```
|
||||
|
||||
|
||||
@@ -37,8 +37,12 @@ You can also easily load this wrapper as a Tool (to use with an Agent).
|
||||
You can do this with:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
tools = load_tools(["openweathermap-api"])
|
||||
import os
|
||||
from langchain_community.utilities import OpenWeatherMapAPIWrapper
|
||||
|
||||
os.environ["OPENWEATHERMAP_API_KEY"] = ""
|
||||
weather = OpenWeatherMapAPIWrapper()
|
||||
tools = [weather.run]
|
||||
```
|
||||
|
||||
For more information on tools, see [this page](/docs/how_to/tools_builtin).
|
||||
|
||||
@@ -29,11 +29,44 @@ For a more detailed walkthrough of the Pinecone vectorstore, see [this notebook]
|
||||
|
||||
### Sparse Vector store
|
||||
|
||||
LangChain's `PineconeSparseVectorStore` enables sparse retrieval using Pinecone's sparse English model. It maps text to sparse vectors and supports adding documents and similarity search.
|
||||
|
||||
```python
|
||||
from langchain_pinecone import PineconeSparseVectorStore
|
||||
|
||||
# Initialize sparse vector store
|
||||
vector_store = PineconeSparseVectorStore(
|
||||
index=my_index,
|
||||
embedding_model="pinecone-sparse-english-v0"
|
||||
)
|
||||
# Add documents
|
||||
vector_store.add_documents(documents)
|
||||
# Query
|
||||
results = vector_store.similarity_search("your query", k=3)
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Pinecone vectorstore, see [this notebook](/docs/integrations/vectorstores/pinecone_sparse)
|
||||
For a more detailed walkthrough, see the [Pinecone Sparse Vector Store notebook](/docs/integrations/vectorstores/pinecone_sparse).
|
||||
|
||||
### Sparse Embedding
|
||||
|
||||
LangChain's `PineconeSparseEmbeddings` provides sparse embedding generation using Pinecone's `pinecone-sparse-english-v0` model.
|
||||
|
||||
```python
|
||||
from langchain_pinecone.embeddings import PineconeSparseEmbeddings
|
||||
|
||||
# Initialize sparse embeddings
|
||||
sparse_embeddings = PineconeSparseEmbeddings(
|
||||
model="pinecone-sparse-english-v0"
|
||||
)
|
||||
# Embed a single query (returns SparseValues)
|
||||
query_embedding = sparse_embeddings.embed_query("sample text")
|
||||
|
||||
# Embed multiple documents (returns list of SparseValues)
|
||||
docs = ["Document 1 content", "Document 2 content"]
|
||||
doc_embeddings = sparse_embeddings.embed_documents(docs)
|
||||
```
|
||||
|
||||
For more detailed usage, see the [Pinecone Sparse Embeddings notebook](/docs/integrations/vectorstores/pinecone_sparse).
|
||||
|
||||
|
||||
## Retrievers
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
# Salute Devices
|
||||
|
||||
Salute Devices provides GigaChat LLM's models.
|
||||
|
||||
For more info how to get access to GigaChat [follow here](https://developers.sber.ru/docs/ru/gigachat/api/integration).
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
GigaChat package can be installed via pip from PyPI:
|
||||
|
||||
```bash
|
||||
pip install langchain-gigachat
|
||||
```
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/gigachat).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import GigaChat
|
||||
```
|
||||
|
||||
## Chat models
|
||||
|
||||
See a [usage example](/docs/integrations/chat/gigachat).
|
||||
|
||||
```python
|
||||
from langchain_gigachat.chat_models import GigaChat
|
||||
```
|
||||
|
||||
## Embeddings
|
||||
|
||||
See a [usage example](/docs/integrations/text_embedding/gigachat).
|
||||
|
||||
```python
|
||||
from langchain_gigachat.embeddings import GigaChatEmbeddings
|
||||
```
|
||||
@@ -73,7 +73,7 @@ You can also easily load this wrapper as a Tool (to use with an Agent).
|
||||
You can do this with:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["searchapi"])
|
||||
```
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ You can also load this wrapper as a Tool (to use with an Agent).
|
||||
You can do this with:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["searx-search"],
|
||||
searx_host="http://localhost:8888",
|
||||
engines=["github"])
|
||||
|
||||
@@ -24,7 +24,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
|
||||
You can also easily load this wrapper as a Tool (to use with an Agent).
|
||||
You can do this with:
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["serpapi"])
|
||||
```
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
|
||||
You can also easily load this wrapper as a Tool (to use with an Agent).
|
||||
You can do this with:
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["stackexchange"])
|
||||
```
|
||||
|
||||
|
||||
@@ -315,6 +315,17 @@
|
||||
"Vectara offers Intelligent Query Rewriting option which enhances search precision by automatically generating metadata filter expressions from natural language queries. This capability analyzes user queries, extracts relevant metadata filters, and rephrases the query to focus on the core information need. For more details [go to this notebook](../retrievers/self_query/vectara_self_query.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Vectara tools\n",
|
||||
"Vectara provides serval tools that can be used with Langchain. For more details [go to this notebook](../tools/vectara.ipynb)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "beadf6f485c1a69"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
@@ -32,7 +32,7 @@ For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integ
|
||||
You can also easily load this wrapper as a Tool (to use with an Agent).
|
||||
You can do this with:
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from langchain_community.agent_toolkits.load_tools import load_tools
|
||||
tools = load_tools(["wolfram-alpha"])
|
||||
```
|
||||
|
||||
|
||||
@@ -365,7 +365,7 @@
|
||||
],
|
||||
"source": [
|
||||
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
||||
"from langchain.retrievers.document_compressors import FlashrankRerank\n",
|
||||
"from langchain_community.document_compressors import FlashrankRerank\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
|
||||
@@ -1,639 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Milvus Hybrid Search\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Milvus Hybrid Search Retriever\n",
|
||||
"\n",
|
||||
"> [Milvus](https://milvus.io/docs) is an open-source vector database built to power embedding similarity search and AI applications. Milvus makes unstructured data search more accessible, and provides a consistent user experience regardless of the deployment environment.\n",
|
||||
"\n",
|
||||
"This will help you getting started with the Milvus Hybrid Search [retriever](/docs/concepts/retrievers), which combines the strengths of both dense and sparse vector search. For detailed documentation of all `MilvusCollectionHybridSearchRetriever` features and configurations head to the [API reference](https://python.langchain.com/api_reference/milvus/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html).\n",
|
||||
"\n",
|
||||
"See also the Milvus Multi-Vector Search [docs](https://milvus.io/docs/multi-vector-search.md).\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import {ItemTable} from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"document_retrievers\" item=\"MilvusCollectionHybridSearchRetriever\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This retriever lives in the `langchain-milvus` package. This guide requires the following dependencies:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pymilvus[model] langchain-milvus langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever\n",
|
||||
"from langchain_milvus.utils.sparse import BM25SparseEmbedding\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from pymilvus import (\n",
|
||||
" Collection,\n",
|
||||
" CollectionSchema,\n",
|
||||
" DataType,\n",
|
||||
" FieldSchema,\n",
|
||||
" WeightedRanker,\n",
|
||||
" connections,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Start the Milvus service\n",
|
||||
"\n",
|
||||
"Please refer to the [Milvus documentation](https://milvus.io/docs/install_standalone-docker.md) to start the Milvus service.\n",
|
||||
"\n",
|
||||
"After starting milvus, you need to specify your milvus connection URI."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"CONNECTION_URI = \"http://localhost:19530\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"### Prepare OpenAI API Key\n",
|
||||
"\n",
|
||||
"Please refer to the [OpenAI documentation](https://platform.openai.com/account/api-keys) to obtain your OpenAI API key, and set it as an environment variable.\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export OPENAI_API_KEY=<your_api_key>\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare dense and sparse embedding functions\n",
|
||||
"\n",
|
||||
"Let us fictionalize 10 fake descriptions of novels. In actual production, it may be a large amount of text data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"texts = [\n",
|
||||
" \"In 'The Whispering Walls' by Ava Moreno, a young journalist named Sophia uncovers a decades-old conspiracy hidden within the crumbling walls of an ancient mansion, where the whispers of the past threaten to destroy her own sanity.\",\n",
|
||||
" \"In 'The Last Refuge' by Ethan Blackwood, a group of survivors must band together to escape a post-apocalyptic wasteland, where the last remnants of humanity cling to life in a desperate bid for survival.\",\n",
|
||||
" \"In 'The Memory Thief' by Lila Rose, a charismatic thief with the ability to steal and manipulate memories is hired by a mysterious client to pull off a daring heist, but soon finds themselves trapped in a web of deceit and betrayal.\",\n",
|
||||
" \"In 'The City of Echoes' by Julian Saint Clair, a brilliant detective must navigate a labyrinthine metropolis where time is currency, and the rich can live forever, but at a terrible cost to the poor.\",\n",
|
||||
" \"In 'The Starlight Serenade' by Ruby Flynn, a shy astronomer discovers a mysterious melody emanating from a distant star, which leads her on a journey to uncover the secrets of the universe and her own heart.\",\n",
|
||||
" \"In 'The Shadow Weaver' by Piper Redding, a young orphan discovers she has the ability to weave powerful illusions, but soon finds herself at the center of a deadly game of cat and mouse between rival factions vying for control of the mystical arts.\",\n",
|
||||
" \"In 'The Lost Expedition' by Caspian Grey, a team of explorers ventures into the heart of the Amazon rainforest in search of a lost city, but soon finds themselves hunted by a ruthless treasure hunter and the treacherous jungle itself.\",\n",
|
||||
" \"In 'The Clockwork Kingdom' by Augusta Wynter, a brilliant inventor discovers a hidden world of clockwork machines and ancient magic, where a rebellion is brewing against the tyrannical ruler of the land.\",\n",
|
||||
" \"In 'The Phantom Pilgrim' by Rowan Welles, a charismatic smuggler is hired by a mysterious organization to transport a valuable artifact across a war-torn continent, but soon finds themselves pursued by deadly assassins and rival factions.\",\n",
|
||||
" \"In 'The Dreamwalker's Journey' by Lyra Snow, a young dreamwalker discovers she has the ability to enter people's dreams, but soon finds herself trapped in a surreal world of nightmares and illusions, where the boundaries between reality and fantasy blur.\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will use the [OpenAI Embedding](https://platform.openai.com/docs/guides/embeddings) to generate dense vectors, and the [BM25 algorithm](https://en.wikipedia.org/wiki/Okapi_BM25) to generate sparse vectors.\n",
|
||||
"\n",
|
||||
"Initialize dense embedding function and get dimension"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1536"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dense_embedding_func = OpenAIEmbeddings()\n",
|
||||
"dense_dim = len(dense_embedding_func.embed_query(texts[1]))\n",
|
||||
"dense_dim"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize sparse embedding function.\n",
|
||||
"\n",
|
||||
"Note that the output of sparse embedding is a set of sparse vectors, which represents the index and weight of the keywords of the input text."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{0: 0.4270424944042204,\n",
|
||||
" 21: 1.845826690498331,\n",
|
||||
" 22: 1.845826690498331,\n",
|
||||
" 23: 1.845826690498331,\n",
|
||||
" 24: 1.845826690498331,\n",
|
||||
" 25: 1.845826690498331,\n",
|
||||
" 26: 1.845826690498331,\n",
|
||||
" 27: 1.2237754316221157,\n",
|
||||
" 28: 1.845826690498331,\n",
|
||||
" 29: 1.845826690498331,\n",
|
||||
" 30: 1.845826690498331,\n",
|
||||
" 31: 1.845826690498331,\n",
|
||||
" 32: 1.845826690498331,\n",
|
||||
" 33: 1.845826690498331,\n",
|
||||
" 34: 1.845826690498331,\n",
|
||||
" 35: 1.845826690498331,\n",
|
||||
" 36: 1.845826690498331,\n",
|
||||
" 37: 1.845826690498331,\n",
|
||||
" 38: 1.845826690498331,\n",
|
||||
" 39: 1.845826690498331}"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sparse_embedding_func = BM25SparseEmbedding(corpus=texts)\n",
|
||||
"sparse_embedding_func.embed_query(texts[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Milvus Collection and load data\n",
|
||||
"\n",
|
||||
"Initialize connection URI and establish connection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"connections.connect(uri=CONNECTION_URI)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define field names and their data types"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pk_field = \"doc_id\"\n",
|
||||
"dense_field = \"dense_vector\"\n",
|
||||
"sparse_field = \"sparse_vector\"\n",
|
||||
"text_field = \"text\"\n",
|
||||
"fields = [\n",
|
||||
" FieldSchema(\n",
|
||||
" name=pk_field,\n",
|
||||
" dtype=DataType.VARCHAR,\n",
|
||||
" is_primary=True,\n",
|
||||
" auto_id=True,\n",
|
||||
" max_length=100,\n",
|
||||
" ),\n",
|
||||
" FieldSchema(name=dense_field, dtype=DataType.FLOAT_VECTOR, dim=dense_dim),\n",
|
||||
" FieldSchema(name=sparse_field, dtype=DataType.SPARSE_FLOAT_VECTOR),\n",
|
||||
" FieldSchema(name=text_field, dtype=DataType.VARCHAR, max_length=65_535),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a collection with the defined schema"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = CollectionSchema(fields=fields, enable_dynamic_field=False)\n",
|
||||
"collection = Collection(\n",
|
||||
" name=\"IntroductionToTheNovels\", schema=schema, consistency_level=\"Strong\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define index for dense and sparse vectors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dense_index = {\"index_type\": \"FLAT\", \"metric_type\": \"IP\"}\n",
|
||||
"collection.create_index(\"dense_vector\", dense_index)\n",
|
||||
"sparse_index = {\"index_type\": \"SPARSE_INVERTED_INDEX\", \"metric_type\": \"IP\"}\n",
|
||||
"collection.create_index(\"sparse_vector\", sparse_index)\n",
|
||||
"collection.flush()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Insert entities into the collection and load the collection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"entities = []\n",
|
||||
"for text in texts:\n",
|
||||
" entity = {\n",
|
||||
" dense_field: dense_embedding_func.embed_documents([text])[0],\n",
|
||||
" sparse_field: sparse_embedding_func.embed_documents([text])[0],\n",
|
||||
" text_field: text,\n",
|
||||
" }\n",
|
||||
" entities.append(entity)\n",
|
||||
"collection.insert(entities)\n",
|
||||
"collection.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our retriever, defining search parameters for sparse and dense fields:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sparse_search_params = {\"metric_type\": \"IP\"}\n",
|
||||
"dense_search_params = {\"metric_type\": \"IP\", \"params\": {}}\n",
|
||||
"retriever = MilvusCollectionHybridSearchRetriever(\n",
|
||||
" collection=collection,\n",
|
||||
" rerank=WeightedRanker(0.5, 0.5),\n",
|
||||
" anns_fields=[dense_field, sparse_field],\n",
|
||||
" field_embeddings=[dense_embedding_func, sparse_embedding_func],\n",
|
||||
" field_search_params=[dense_search_params, sparse_search_params],\n",
|
||||
" top_k=3,\n",
|
||||
" text_field=text_field,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"In the input parameters of this Retriever, we use a dense embedding and a sparse embedding to perform hybrid search on the two fields of this Collection, and use WeightedRanker for reranking. Finally, 3 top-K Documents will be returned."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content=\"In 'The Lost Expedition' by Caspian Grey, a team of explorers ventures into the heart of the Amazon rainforest in search of a lost city, but soon finds themselves hunted by a ruthless treasure hunter and the treacherous jungle itself.\", metadata={'doc_id': '449281835035545843'}),\n",
|
||||
" Document(page_content=\"In 'The Phantom Pilgrim' by Rowan Welles, a charismatic smuggler is hired by a mysterious organization to transport a valuable artifact across a war-torn continent, but soon finds themselves pursued by deadly assassins and rival factions.\", metadata={'doc_id': '449281835035545845'}),\n",
|
||||
" Document(page_content=\"In 'The Dreamwalker's Journey' by Lyra Snow, a young dreamwalker discovers she has the ability to enter people's dreams, but soon finds herself trapped in a surreal world of nightmares and illusions, where the boundaries between reality and fantasy blur.\", metadata={'doc_id': '449281835035545846'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.invoke(\"What are the story about ventures?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within a chain\n",
|
||||
"\n",
|
||||
"Initialize ChatOpenAI and define a prompt template"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"PROMPT_TEMPLATE = \"\"\"\n",
|
||||
"Human: You are an AI assistant, and provides answers to questions by using fact based and statistical information when possible.\n",
|
||||
"Use the following pieces of information to provide a concise answer to the question enclosed in <question> tags.\n",
|
||||
"\n",
|
||||
"<context>\n",
|
||||
"{context}\n",
|
||||
"</context>\n",
|
||||
"\n",
|
||||
"<question>\n",
|
||||
"{question}\n",
|
||||
"</question>\n",
|
||||
"\n",
|
||||
"Assistant:\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" template=PROMPT_TEMPLATE, input_variables=[\"context\", \"question\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Define a function for formatting documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Define a chain using the retriever and other components"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rag_chain = (\n",
|
||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Perform a query using the defined chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Lila Rose has written 'The Memory Thief,' which follows a charismatic thief with the ability to steal and manipulate memories as they navigate a daring heist and a web of deceit and betrayal.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"rag_chain.invoke(\"What novels has Lila written and what are their contents?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Drop the collection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"collection.drop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `MilvusCollectionHybridSearchRetriever` features and configurations head to the [API reference](https://python.langchain.com/api_reference/milvus/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
322
docs/docs/integrations/retrievers/pinecone_rerank.ipynb
Normal file
322
docs/docs/integrations/retrievers/pinecone_rerank.ipynb
Normal file
@@ -0,0 +1,322 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7fb27b941602401d91542211134fc71a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pinecone Rerank\n",
|
||||
"\n",
|
||||
"> This notebook shows how to use **PineconeRerank** for two-stage vector retrieval reranking using Pinecone's hosted reranking API as demonstrated in `langchain_pinecone/libs/pinecone/rerank.py`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acae54e37e7d407bbb7b55eff062a284",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"Install the `langchain-pinecone` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9a63283cbaf04dbcab1f6479b197f3a8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU \"langchain-pinecone\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8dd0d8092fe74a7c96281538738b07e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Credentials\n",
|
||||
"Set your Pinecone API key to use the reranking API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "72eea5119410473aa328ad9291626812",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"PINECONE_API_KEY\"] = os.getenv(\"PINECONE_API_KEY\") or getpass(\n",
|
||||
" \"Enter your Pinecone API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8edb47106e1a46a883d545849b8ab81b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"Use `PineconeRerank` to rerank a list of documents by relevance to a query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "10185d26023b46108eb7d9f57d49d2b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/jakit/customers/aurelio/langchain-pinecone/libs/pinecone/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Score: 0.9998 | Content: Paris is the capital of France.\n",
|
||||
"Score: 0.1950 | Content: The Eiffel Tower is in Paris.\n",
|
||||
"Score: 0.0042 | Content: Berlin is the capital of Germany.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_pinecone import PineconeRerank\n",
|
||||
"\n",
|
||||
"# Initialize reranker\n",
|
||||
"reranker = PineconeRerank(model=\"bge-reranker-v2-m3\")\n",
|
||||
"\n",
|
||||
"# Sample documents\n",
|
||||
"documents = [\n",
|
||||
" Document(page_content=\"Paris is the capital of France.\"),\n",
|
||||
" Document(page_content=\"Berlin is the capital of Germany.\"),\n",
|
||||
" Document(page_content=\"The Eiffel Tower is in Paris.\"),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Rerank documents\n",
|
||||
"query = \"What is the capital of France?\"\n",
|
||||
"reranked_docs = reranker.compress_documents(documents, query)\n",
|
||||
"\n",
|
||||
"# Print results\n",
|
||||
"for doc in reranked_docs:\n",
|
||||
" score = doc.metadata.get(\"relevance_score\")\n",
|
||||
" print(f\"Score: {score:.4f} | Content: {doc.page_content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8763a12b2bbd4a93a75aff182afb95dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"### Reranking with Top-N\n",
|
||||
"Specify `top_n` to limit the number of returned documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7623eae2785240b9bd12b16a66d81610",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Top-1 Result:\n",
|
||||
"Score: 0.9998 | Content: Paris is the capital of France.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Return only top-1 result\n",
|
||||
"reranker_top1 = PineconeRerank(model=\"bge-reranker-v2-m3\", top_n=1)\n",
|
||||
"top1_docs = reranker_top1.compress_documents(documents, query)\n",
|
||||
"print(\"Top-1 Result:\")\n",
|
||||
"for doc in top1_docs:\n",
|
||||
" print(f\"Score: {doc.metadata['relevance_score']:.4f} | Content: {doc.page_content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cdc8c89c7104fffa095e18ddfef8986",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Reranking with Custom Rank Fields\n",
|
||||
"If your documents are dictionaries or have custom fields, use `rank_fields` to specify the field to rank on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b118ea5561624da68c537baed56e602f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ID: doc3 | Score: 0.9892\n",
|
||||
"ID: doc1 | Score: 0.0006\n",
|
||||
"ID: doc2 | Score: 0.0000\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Sample dictionary documents with 'text' field\n",
|
||||
"docs_dict = [\n",
|
||||
" {\n",
|
||||
" \"id\": \"doc1\",\n",
|
||||
" \"text\": \"Article about renewable energy.\",\n",
|
||||
" \"title\": \"Renewable Energy\",\n",
|
||||
" },\n",
|
||||
" {\"id\": \"doc2\", \"text\": \"Report on economic growth.\", \"title\": \"Economic Growth\"},\n",
|
||||
" {\n",
|
||||
" \"id\": \"doc3\",\n",
|
||||
" \"text\": \"News on climate policy changes.\",\n",
|
||||
" \"title\": \"Climate Policy\",\n",
|
||||
" },\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Initialize reranker with rank_fields\n",
|
||||
"reranker_text = PineconeRerank(model=\"bge-reranker-v2-m3\", rank_fields=[\"text\"])\n",
|
||||
"climate_docs = reranker_text.rerank(docs_dict, \"Latest news on climate change.\")\n",
|
||||
"\n",
|
||||
"# Show IDs and scores\n",
|
||||
"for res in climate_docs:\n",
|
||||
" print(f\"ID: {res['id']} | Score: {res['score']:.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a80bb6c3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can rerank based on title field"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a6f2768e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ID: doc2 | Score: 0.8918 | Title: Economic Growth\n",
|
||||
"ID: doc3 | Score: 0.0002 | Title: Climate Policy\n",
|
||||
"ID: doc1 | Score: 0.0000 | Title: Renewable Energy\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"economic_docs = reranker_text.rerank(docs_dict, \"Economic forecast.\")\n",
|
||||
"\n",
|
||||
"# Show IDs and scores\n",
|
||||
"for res in economic_docs:\n",
|
||||
" print(\n",
|
||||
" f\"ID: {res['id']} | Score: {res['score']:.4f} | Title: {res['document']['title']}\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "938c804e27f84196a10c8828c723f798",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Reranking with Additional Parameters\n",
|
||||
"You can pass model-specific parameters (e.g., `truncate`) directly to `.rerank()`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a94c501c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"How to handle inputs longer than those supported by the model. Accepted values: END or NONE.\n",
|
||||
"END truncates the input sequence at the input token limit. NONE returns an error when the input exceeds the input token limit."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "504fb2a444614c0babb325280ed9130a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ID: docA | Score: 0.6950\n",
|
||||
"ID: docB | Score: 0.0001\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Rerank with custom truncate parameter\n",
|
||||
"docs_simple = [\n",
|
||||
" {\"id\": \"docA\", \"text\": \"Quantum entanglement is a physical phenomenon...\"},\n",
|
||||
" {\"id\": \"docB\", \"text\": \"Classical mechanics describes motion...\"},\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"reranked = reranker.rerank(\n",
|
||||
" documents=docs_simple,\n",
|
||||
" query=\"Explain the concept of quantum entanglement.\",\n",
|
||||
" truncate=\"END\",\n",
|
||||
")\n",
|
||||
"# Print reranked IDs and scores\n",
|
||||
"for res in reranked:\n",
|
||||
" print(f\"ID: {res['id']} | Score: {res['score']:.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab78bcd8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within a chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "59bbdb311c014d738909a11f9e486628",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"- `PineconeRerank(model, top_n, rank_fields, return_documents)`\n",
|
||||
"- `.rerank(documents, query, rank_fields=None, model=None, top_n=None, truncate=\"END\")`\n",
|
||||
"- `.compress_documents(documents, query)` (returns `Document` objects with `relevance_score` in metadata)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -6,7 +6,9 @@
|
||||
"source": [
|
||||
"# Astra DB\n",
|
||||
"\n",
|
||||
">[DataStax Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on `Cassandra` and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
|
||||
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
|
||||
"> through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"In the walkthrough, we'll demo the `SelfQueryRetriever` with an `Astra DB` vector store."
|
||||
]
|
||||
|
||||
@@ -23,7 +23,9 @@
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
|
||||
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
|
||||
"> through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
|
||||
@@ -12,24 +12,36 @@
|
||||
"\n",
|
||||
">[Cloudflare Workers AI](https://developers.cloudflare.com/workers-ai/) allows you to run machine learning models, on the `Cloudflare` network, from your code via REST API.\n",
|
||||
"\n",
|
||||
">[Cloudflare AI document](https://developers.cloudflare.com/workers-ai/models/text-embeddings/) listed all text embeddings models available.\n",
|
||||
">[Workers AI Developer Docs](https://developers.cloudflare.com/workers-ai/models/text-embeddings/) lists all text embeddings models available.\n",
|
||||
"\n",
|
||||
"## Setting up\n",
|
||||
"\n",
|
||||
"Both Cloudflare account ID and API token are required. Find how to obtain them from [this document](https://developers.cloudflare.com/workers-ai/get-started/rest-api/).\n"
|
||||
"Both a Cloudflare Account ID and Workers AI API token are required. Find how to obtain them from [this document](https://developers.cloudflare.com/workers-ai/get-started/rest-api/).\n",
|
||||
"\n",
|
||||
"You can pass these parameters explicitly or define as environmental variables.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 11,
|
||||
"id": "f60023b8",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-05-13T06:00:30.121204Z",
|
||||
"start_time": "2025-05-13T06:00:30.117936Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"my_account_id = getpass.getpass(\"Enter your Cloudflare account ID:\\n\\n\")\n",
|
||||
"my_api_token = getpass.getpass(\"Enter your Cloudflare API token:\\n\\n\")"
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv(\".env\")\n",
|
||||
"\n",
|
||||
"cf_acct_id = os.getenv(\"CF_ACCOUNT_ID\")\n",
|
||||
"\n",
|
||||
"cf_ai_token = os.getenv(\"CF_AI_API_TOKEN\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -42,9 +54,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 12,
|
||||
"id": "92c5b61e",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-05-13T06:00:31.224996Z",
|
||||
"start_time": "2025-05-13T06:00:31.222981Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_cloudflare.embeddings import (\n",
|
||||
@@ -54,25 +71,28 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 13,
|
||||
"id": "062547b9",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-05-13T06:00:32.515031Z",
|
||||
"start_time": "2025-05-13T06:00:31.798590Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(384, [-0.033627357333898544, 0.03982774540781975, 0.03559349477291107])"
|
||||
]
|
||||
"text/plain": "(384, [-0.033660888671875, 0.039764404296875, 0.03558349609375])"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"embeddings = CloudflareWorkersAIEmbeddings(\n",
|
||||
" account_id=my_account_id,\n",
|
||||
" api_token=my_api_token,\n",
|
||||
" account_id=cf_acct_id,\n",
|
||||
" api_token=cf_ai_token,\n",
|
||||
" model_name=\"@cf/baai/bge-small-en-v1.5\",\n",
|
||||
")\n",
|
||||
"# single string embeddings\n",
|
||||
@@ -82,17 +102,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 14,
|
||||
"id": "e1dcc4bd",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-05-13T06:00:33.106160Z",
|
||||
"start_time": "2025-05-13T06:00:32.847232Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(3, 384)"
|
||||
]
|
||||
"text/plain": "(3, 384)"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -102,14 +125,6 @@
|
||||
"batch_query_result = embeddings.embed_documents([\"test1\", \"test2\", \"test3\"])\n",
|
||||
"len(batch_query_result), len(batch_query_result[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "52de8b88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -1,120 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"# GigaChat\n",
|
||||
"This notebook shows how to use LangChain with [GigaChat embeddings](https://developers.sber.ru/portal/products/gigachat).\n",
|
||||
"To use you need to install ```gigachat``` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-gigachat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"To get GigaChat credentials you need to [create account](https://developers.sber.ru/studio/login) and [get access to API](https://developers.sber.ru/docs/ru/gigachat/individuals-quickstart)\n",
|
||||
"\n",
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"if \"GIGACHAT_CREDENTIALS\" not in os.environ:\n",
|
||||
" os.environ[\"GIGACHAT_CREDENTIALS\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_gigachat import GigaChatEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = GigaChatEmbeddings(verify_ssl_certs=False, scope=\"GIGACHAT_API_PERS\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_result = embeddings.embed_query(\"The quick brown fox jumps over the lazy dog\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[0.8398333191871643,\n -0.14180311560630798,\n -0.6161925792694092,\n -0.17103666067123413,\n 1.2884578704833984]"
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_result[:5]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
292
docs/docs/integrations/tools/brightdata-webscraperapi.ipynb
Normal file
292
docs/docs/integrations/tools/brightdata-webscraperapi.ipynb
Normal file
@@ -0,0 +1,292 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BrightDataWebScraperAPI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Bright Data](https://brightdata.com/) provides a powerful Web Scraper API that allows you to extract structured data from 100+ ppular domains, including Amazon product details, LinkedIn profiles, and more, making it particularly useful for AI agents requiring reliable structured web data feeds."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Overview"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"|Class|Package|Serializable|JS support|Package latest|\n",
|
||||
"|:--|:--|:-:|:-:|:-:|\n",
|
||||
"|[BrightDataWebScraperAPI](https://pypi.org/project/langchain-brightdata/)|[langchain-brightdata](https://pypi.org/project/langchain-brightdata/)|✅|❌||\n",
|
||||
"\n",
|
||||
"### Tool features\n",
|
||||
"\n",
|
||||
"|Native async|Returns artifact|Return data|Pricing|\n",
|
||||
"|:-:|:-:|:--|:-:|\n",
|
||||
"|❌|❌|Structured data from websites (Amazon products, LinkedIn profiles, etc.)|Requires Bright Data account|\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-brightdata` package.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install langchain-brightdata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You'll need a Bright Data API key to use this tool. You can set it as an environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"BRIGHT_DATA_API_KEY\"] = \"your-api-key\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or pass it directly when initializing the tool:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataWebScraperAPI\n",
|
||||
"\n",
|
||||
"scraper_tool = BrightDataWebScraperAPI(bright_data_api_key=\"your-api-key\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Here we show how to instantiate an instance of the BrightDataWebScraperAPI tool. This tool allows you to extract structured data from various websites including Amazon product details, LinkedIn profiles, and more using Bright Data's Dataset API.\n",
|
||||
"\n",
|
||||
"The tool accepts various parameters during instantiation:\n",
|
||||
"\n",
|
||||
"- `bright_data_api_key` (required, str): Your Bright Data API key for authentication.\n",
|
||||
"- `dataset_mapping` (optional, Dict[str, str]): A dictionary mapping dataset types to their corresponding Bright Data dataset IDs. The default mapping includes:\n",
|
||||
" - \"amazon_product\": \"gd_l7q7dkf244hwjntr0\"\n",
|
||||
" - \"amazon_product_reviews\": \"gd_le8e811kzy4ggddlq\"\n",
|
||||
" - \"linkedin_person_profile\": \"gd_l1viktl72bvl7bjuj0\"\n",
|
||||
" - \"linkedin_company_profile\": \"gd_l1vikfnt1wgvvqz95w\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation\n",
|
||||
"\n",
|
||||
"### Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataWebScraperAPI\n",
|
||||
"\n",
|
||||
"# Initialize the tool\n",
|
||||
"scraper_tool = BrightDataWebScraperAPI(\n",
|
||||
" bright_data_api_key=\"your-api-key\" # Optional if set in environment variables\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Extract Amazon product data\n",
|
||||
"results = scraper_tool.invoke(\n",
|
||||
" {\"url\": \"https://www.amazon.com/dp/B08L5TNJHG\", \"dataset_type\": \"amazon_product\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Advanced Usage with Parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataWebScraperAPI\n",
|
||||
"\n",
|
||||
"# Initialize with default parameters\n",
|
||||
"scraper_tool = BrightDataWebScraperAPI(bright_data_api_key=\"your-api-key\")\n",
|
||||
"\n",
|
||||
"# Extract Amazon product data with location-specific pricing\n",
|
||||
"results = scraper_tool.invoke(\n",
|
||||
" {\n",
|
||||
" \"url\": \"https://www.amazon.com/dp/B08L5TNJHG\",\n",
|
||||
" \"dataset_type\": \"amazon_product\",\n",
|
||||
" \"zipcode\": \"10001\", # Get pricing for New York City\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(results)\n",
|
||||
"\n",
|
||||
"# Extract LinkedIn profile data\n",
|
||||
"linkedin_results = scraper_tool.invoke(\n",
|
||||
" {\n",
|
||||
" \"url\": \"https://www.linkedin.com/in/satyanadella/\",\n",
|
||||
" \"dataset_type\": \"linkedin_person_profile\",\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(linkedin_results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customization Options\n",
|
||||
"\n",
|
||||
"The BrightDataWebScraperAPI tool accepts several parameters for customization:\n",
|
||||
"\n",
|
||||
"|Parameter|Type|Description|\n",
|
||||
"|:--|:--|:--|\n",
|
||||
"|`url`|str|The URL to extract data from|\n",
|
||||
"|`dataset_type`|str|Type of dataset to use (e.g., \"amazon_product\")|\n",
|
||||
"|`zipcode`|str|Optional zipcode for location-specific data|\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Available Dataset Types\n",
|
||||
"\n",
|
||||
"The tool supports the following dataset types for structured data extraction:\n",
|
||||
"\n",
|
||||
"|Dataset Type|Description|\n",
|
||||
"|:--|:--|\n",
|
||||
"|`amazon_product`|Extract detailed Amazon product data|\n",
|
||||
"|`amazon_product_reviews`|Extract Amazon product reviews|\n",
|
||||
"|`linkedin_person_profile`|Extract LinkedIn person profile data|\n",
|
||||
"|`linkedin_company_profile`|Extract LinkedIn company profile data|\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataWebScraperAPI\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"# Initialize the LLM\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\", google_api_key=\"your-api-key\")\n",
|
||||
"\n",
|
||||
"# Initialize the Bright Data Web Scraper API tool\n",
|
||||
"scraper_tool = BrightDataWebScraperAPI(bright_data_api_key=\"your-api-key\")\n",
|
||||
"\n",
|
||||
"# Create the agent with the tool\n",
|
||||
"agent = create_react_agent(llm, [scraper_tool])\n",
|
||||
"\n",
|
||||
"# Provide a user query\n",
|
||||
"user_input = \"Scrape Amazon product data for https://www.amazon.com/dp/B0D2Q9397Y?th=1 in New York (zipcode 10001).\"\n",
|
||||
"\n",
|
||||
"# Stream the agent's step-by-step output\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": user_input},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"- [Bright Data API Documentation](https://docs.brightdata.com/scraping-automation/web-scraper-api/overview)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
294
docs/docs/integrations/tools/brightdata_serp.ipynb
Normal file
294
docs/docs/integrations/tools/brightdata_serp.ipynb
Normal file
@@ -0,0 +1,294 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a6f91f20",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BrightDataSERP\n",
|
||||
"\n",
|
||||
"[Bright Data](https://brightdata.com/) provides a powerful SERP API that allows you to query search engines (Google,Bing.DuckDuckGo,Yandex) with geo-targeting and advanced customization options, particularly useful for AI agents requiring real-time web information.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"|Class|Package|Serializable|JS support|Package latest|\n",
|
||||
"|:--|:--|:-:|:-:|:-:|\n",
|
||||
"|[BrightDataSERP](https://pypi.org/project/langchain-brightdata/)|[langchain-brightdata](https://pypi.org/project/langchain-brightdata/)|✅|❌||\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Tool features\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"|Native async|Returns artifact|Return data|Pricing|\n",
|
||||
"|:-:|:-:|:--|:-:|\n",
|
||||
"|❌|❌|Title, URL, snippet, position, and other search result data|Requires Bright Data account|\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-brightdata` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "f85b4089",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"pip install langchain-brightdata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b15e9266",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"You'll need a Bright Data API key to use this tool. You can set it as an environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"BRIGHT_DATA_API_KEY\"] = \"your-api-key\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bc5ab717-fd27-4c59-b912-bdd099541478",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or pass it directly when initializing the tool:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a6c2f136-6367-4f1f-825d-ae741e1bf281",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataSERP\n",
|
||||
"\n",
|
||||
"serp_tool = BrightDataSERP(bright_data_api_key=\"your-api-key\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eed8cfcc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Here we show how to instantiate an instance of the BrightDataSERP tool. This tool allows you to perform search engine queries with various customization options including geo-targeting, language preferences, device type simulation, and specific search types using Bright Data's SERP API.\n",
|
||||
"\n",
|
||||
"The tool accepts various parameters during instantiation:\n",
|
||||
"\n",
|
||||
"- `bright_data_api_key` (required, str): Your Bright Data API key for authentication.\n",
|
||||
"- `search_engine` (optional, str): Search engine to use for queries. Default is \"google\". Other options include \"bing\", \"yahoo\", \"yandex\", \"DuckDuckGo\" etc.\n",
|
||||
"- `country` (optional, str): Two-letter country code for localized search results (e.g., \"us\", \"gb\", \"de\", \"jp\"). Default is \"us\".\n",
|
||||
"- `language` (optional, str): Two-letter language code for the search results (e.g., \"en\", \"es\", \"fr\", \"de\"). Default is \"en\".\n",
|
||||
"- `results_count` (optional, int): Number of search results to return. Default is 10. Maximum value is typically 100.\n",
|
||||
"- `search_type` (optional, str): Type of search to perform. Options include:\n",
|
||||
" - None (default): Regular web search\n",
|
||||
" - \"isch\": Images search\n",
|
||||
" - \"shop\": Shopping search\n",
|
||||
" - \"nws\": News search\n",
|
||||
" - \"jobs\": Jobs search\n",
|
||||
"- `device_type` (optional, str): Device type to simulate for the search. Options include:\n",
|
||||
" - None (default): Desktop device\n",
|
||||
" - \"mobile\": Generic mobile device\n",
|
||||
" - \"ios\": iOS device (iPhone)\n",
|
||||
" - \"android\": Android device\n",
|
||||
"- `parse_results` (optional, bool): Whether to return parsed JSON results. Default is False, which returns raw HTML response."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "902dc1fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataSERP\n",
|
||||
"\n",
|
||||
"# Initialize the tool\n",
|
||||
"serp_tool = BrightDataSERP(\n",
|
||||
" bright_data_api_key=\"your-api-key\" # Optional if set in environment variables\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Run a basic search\n",
|
||||
"results = serp_tool.invoke(\"latest AI research papers\")\n",
|
||||
"\n",
|
||||
"print(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74147a1a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Advanced Usage with Parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataSERP\n",
|
||||
"\n",
|
||||
"# Initialize with default parameters\n",
|
||||
"serp_tool = BrightDataSERP(\n",
|
||||
" bright_data_api_key=\"your-api-key\",\n",
|
||||
" search_engine=\"google\", # Default\n",
|
||||
" country=\"us\", # Default\n",
|
||||
" language=\"en\", # Default\n",
|
||||
" results_count=10, # Default\n",
|
||||
" parse_results=True, # Get structured JSON results\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use with specific parameters for this search\n",
|
||||
"results = serp_tool.invoke(\n",
|
||||
" {\n",
|
||||
" \"query\": \"best electric vehicles\",\n",
|
||||
" \"country\": \"de\", # Get results as if searching from Germany\n",
|
||||
" \"language\": \"de\", # Get results in German\n",
|
||||
" \"search_type\": \"shop\", # Get shopping results\n",
|
||||
" \"device_type\": \"mobile\", # Simulate a mobile device\n",
|
||||
" \"results_count\": 15,\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d6e73897",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customization Options\n",
|
||||
"\n",
|
||||
"The BrightDataSERP tool accepts several parameters for customization:\n",
|
||||
"\n",
|
||||
"|Parameter|Type|Description|\n",
|
||||
"|:--|:--|:--|\n",
|
||||
"|`query`|str|The search query to perform|\n",
|
||||
"|`search_engine`|str|Search engine to use (default: \"google\")|\n",
|
||||
"|`country`|str|Two-letter country code for localized results (default: \"us\")|\n",
|
||||
"|`language`|str|Two-letter language code (default: \"en\")|\n",
|
||||
"|`results_count`|int|Number of results to return (default: 10)|\n",
|
||||
"|`search_type`|str|Type of search: None (web), \"isch\" (images), \"shop\", \"nws\" (news), \"jobs\"|\n",
|
||||
"|`device_type`|str|Device type: None (desktop), \"mobile\", \"ios\", \"android\"|\n",
|
||||
"|`parse_results`|bool|Whether to return structured JSON (default: False)|\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3353ce6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8c91c32f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataSERP\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"# Initialize the LLM\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\", google_api_key=\"your-api-key\")\n",
|
||||
"\n",
|
||||
"# Initialize the Bright Data SERP tool\n",
|
||||
"serp_tool = BrightDataSERP(\n",
|
||||
" bright_data_api_key=\"your-api-key\",\n",
|
||||
" search_engine=\"google\",\n",
|
||||
" country=\"us\",\n",
|
||||
" language=\"en\",\n",
|
||||
" results_count=10,\n",
|
||||
" parse_results=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create the agent\n",
|
||||
"agent = create_react_agent(llm, [serp_tool])\n",
|
||||
"\n",
|
||||
"# Provide a user query\n",
|
||||
"user_input = \"Search for 'best electric vehicles' shopping results in Germany in German using mobile.\"\n",
|
||||
"\n",
|
||||
"# Stream the agent's output step-by-step\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": user_input},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e8dec55a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"- [Bright Data API Documentation](https://docs.brightdata.com/scraping-automation/serp-api/introduction)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
314
docs/docs/integrations/tools/brightdata_unlocker.ipynb
Normal file
314
docs/docs/integrations/tools/brightdata_unlocker.ipynb
Normal file
@@ -0,0 +1,314 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BrightDataUnlocker"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Bright Data](https://brightdata.com/) provides a powerful Web Unlocker API that allows you to access websites that might be protected by anti-bot measures, geo-restrictions, or other access limitations, making it particularly useful for AI agents requiring reliable web content extraction."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Overview"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Integration details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"|Class|Package|Serializable|JS support|Package latest|\n",
|
||||
"|:--|:--|:-:|:-:|:-:|\n",
|
||||
"|[BrightDataUnlocker](https://pypi.org/project/langchain-brightdata/)|[langchain-brightdata](https://pypi.org/project/langchain-brightdata/)|✅|❌||\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Tool features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"|Native async|Returns artifact|Return data|Pricing|\n",
|
||||
"|:-:|:-:|:--|:-:|\n",
|
||||
"|❌|❌|HTML, Markdown, or screenshot of web pages|Requires Bright Data account|\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The integration lives in the `langchain-brightdata` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install langchain-brightdata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You'll need a Bright Data API key to use this tool. You can set it as an environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"BRIGHT_DATA_API_KEY\"] = \"your-api-key\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or pass it directly when initializing the tool:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataUnlocker\n",
|
||||
"\n",
|
||||
"unlocker_tool = BrightDataUnlocker(bright_data_api_key=\"your-api-key\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Here we show how to instantiate an instance of the BrightDataUnlocker tool. This tool allows you to access websites that may be protected by anti-bot measures, geo-restrictions, or other access limitations using Bright Data's Web Unlocker service.\n",
|
||||
"\n",
|
||||
"The tool accepts various parameters during instantiation:\n",
|
||||
"\n",
|
||||
"- `bright_data_api_key` (required, str): Your Bright Data API key for authentication.\n",
|
||||
"- `format` (optional, Literal[\"raw\"]): Format of the response content. Default is \"raw\".\n",
|
||||
"- `country` (optional, str): Two-letter country code for geo-specific access (e.g., \"us\", \"gb\", \"de\", \"jp\"). Set this when you need to view the website as if accessing from a specific country. Default is None.\n",
|
||||
"- `zone` (optional, str): Bright Data zone to use for the request. The \"unlocker\" zone is optimized for accessing websites that might block regular requests. Default is \"unlocker\".\n",
|
||||
"- `data_format` (optional, Literal[\"html\", \"markdown\", \"screenshot\"]): Output format for the retrieved content. Options include:\n",
|
||||
" - \"html\" - Returns the standard HTML content (default)\n",
|
||||
" - \"markdown\" - Returns content converted to markdown format\n",
|
||||
" - \"screenshot\" - Returns a PNG screenshot of the rendered page"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataUnlocker\n",
|
||||
"\n",
|
||||
"# Initialize the tool\n",
|
||||
"unlocker_tool = BrightDataUnlocker(\n",
|
||||
" bright_data_api_key=\"your-api-key\" # Optional if set in environment variables\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Access a webpage\n",
|
||||
"result = unlocker_tool.invoke(\"https://example.com\")\n",
|
||||
"\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Advanced Usage with Parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataUnlocker\n",
|
||||
"\n",
|
||||
"unlocker_tool = BrightDataUnlocker(\n",
|
||||
" bright_data_api_key=\"your-api-key\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Access a webpage with specific parameters\n",
|
||||
"result = unlocker_tool.invoke(\n",
|
||||
" {\n",
|
||||
" \"url\": \"https://example.com/region-restricted-content\",\n",
|
||||
" \"country\": \"gb\", # Access as if from Great Britain\n",
|
||||
" \"data_format\": \"html\", # Get content in markdown format\n",
|
||||
" \"zone\": \"unlocker\", # Use the unlocker zone\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customization Options"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The BrightDataUnlocker tool accepts several parameters for customization:\n",
|
||||
"\n",
|
||||
"|Parameter|Type|Description|\n",
|
||||
"|:--|:--|:--|\n",
|
||||
"|`url`|str|The URL to access|\n",
|
||||
"|`format`|str|Format of the response content (default: \"raw\")|\n",
|
||||
"|`country`|str|Two-letter country code for geo-specific access (e.g., \"us\", \"gb\")|\n",
|
||||
"|`zone`|str|Bright Data zone to use (default: \"unlocker\")|\n",
|
||||
"|`data_format`|str|Output format: None (HTML), \"markdown\", or \"screenshot\"|\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data Format Options\n",
|
||||
"\n",
|
||||
"The `data_format` parameter allows you to specify how the content should be returned:\n",
|
||||
"\n",
|
||||
"- `None` or `\"html\"` (default): Returns the standard HTML content of the page\n",
|
||||
"- `\"markdown\"`: Returns the content converted to markdown format, which is useful for feeding directly to LLMs\n",
|
||||
"- `\"screenshot\"`: Returns a PNG screenshot of the rendered page, useful for visual analysis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_brightdata import BrightDataUnlocker\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"# Initialize the LLM\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\", google_api_key=\"your-api-key\")\n",
|
||||
"\n",
|
||||
"# Initialize the tool\n",
|
||||
"bright_data_tool = BrightDataUnlocker(bright_data_api_key=\"your-api-key\")\n",
|
||||
"\n",
|
||||
"# Create the agent\n",
|
||||
"agent = create_react_agent(llm, [bright_data_tool])\n",
|
||||
"\n",
|
||||
"# Input URLs or prompt\n",
|
||||
"user_input = \"Get the content from https://example.com/region-restricted-page - access it from GB\"\n",
|
||||
"\n",
|
||||
"# Stream the agent's output step by step\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": user_input},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"- [Bright Data API Documentation](https://docs.brightdata.com/scraping-automation/web-unlocker/introduction)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -16,7 +16,7 @@
|
||||
"1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
|
||||
"\n",
|
||||
"## Instructions for retrieving your Google Docs data\n",
|
||||
"By default, the `GoogleDriveTools` and `GoogleDriveWrapper` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `GOOGLE_ACCOUNT_FILE` environment variable. \n",
|
||||
"By default, the `GoogleDriveTools` and `GoogleDriveWrapper` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable by setting the `GOOGLE_ACCOUNT_FILE` environment variable to your `custom/path/to/credentials.json`. \n",
|
||||
"The location of `token.json` use the same directory (or use the parameter `token_path`). Note that `token.json` will be created automatically the first time you use the tool.\n",
|
||||
"\n",
|
||||
"`GoogleDriveSearchTool` can retrieve a selection of files with some requests. \n",
|
||||
@@ -47,7 +47,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -88,7 +88,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet unstructured"
|
||||
"%pip install --upgrade --quiet unstructured langchain-googledrive"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -99,9 +99,13 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_googledrive.tools.google_drive.tool import GoogleDriveSearchTool\n",
|
||||
"from langchain_googledrive.utilities.google_drive import GoogleDriveAPIWrapper\n",
|
||||
"\n",
|
||||
"os.environ[\"GOOGLE_ACCOUNT_FILE\"] = \"custom/path/to/credentials.json\"\n",
|
||||
"\n",
|
||||
"# By default, search only in the filename.\n",
|
||||
"tool = GoogleDriveSearchTool(\n",
|
||||
" api_wrapper=GoogleDriveAPIWrapper(\n",
|
||||
@@ -114,7 +118,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -134,33 +138,52 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"A wrapper around Google Drive Search. Useful for when you need to find a document in google drive. The input should be formatted as a list of entities separated with a space. As an example, a list of keywords is 'hello word'.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.description"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"\n",
|
||||
"tools = load_tools(\n",
|
||||
" [\"google-drive-search\"],\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" template=\"gdrive-query-in-folder\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an Agent"
|
||||
"## Use the tool within a ReAct agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to create an agent that uses the Google Jobs tool install Langgraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langgraph langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"and use the `create_react_agent` functionality to initialize a ReAct agent. You will also need to set up your OPEN_API_KEY (visit https://platform.openai.com) in order to access OpenAI's chat models."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -171,32 +194,29 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\"Search in google drive, who is 'Yann LeCun' ?\")"
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm = init_chat_model(\"gpt-4o-mini\", model_provider=\"openai\", temperature=0)\n",
|
||||
"agent = create_react_agent(llm, tools=[tool])\n",
|
||||
"\n",
|
||||
"events = agent.stream(\n",
|
||||
" {\"messages\": [(\"user\", \"Search in google drive, who is 'Yann LeCun' ?\")]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -210,7 +230,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,55 +6,76 @@
|
||||
"source": [
|
||||
"# Google Finance\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the Google Finance Tool to get information from the Google Finance page\n",
|
||||
"This notebook goes over how to use the Google Finance Tool to get information from the Google Finance page.\n",
|
||||
"\n",
|
||||
"To get an SerpApi key key, sign up at: https://serpapi.com/users/sign_up.\n",
|
||||
"\n",
|
||||
"Then install google-search-results with the command: \n",
|
||||
"\n",
|
||||
"pip install google-search-results\n",
|
||||
"\n",
|
||||
"Then set the environment variable SERPAPI_API_KEY to your SerpApi key\n",
|
||||
"\n",
|
||||
"Or pass the key in as a argument to the wrapper serp_api_key=\"your secret key\""
|
||||
"To use the tool with Langchain install following packages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet google-search-results langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use the Tool"
|
||||
"Then set the environment variable SERPAPI_API_KEY to your SerpApi key or pass the key in as a argument to the wrapper serp_api_key=\"your secret key\"."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet google-search-results langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.tools.google_finance import GoogleFinanceQueryRun\n",
|
||||
"from langchain_community.utilities.google_finance import GoogleFinanceAPIWrapper\n",
|
||||
"\n",
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = \"\"\n",
|
||||
"tool = GoogleFinanceQueryRun(api_wrapper=GoogleFinanceAPIWrapper())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nQuery: Google\\nstock: GOOGL:NASDAQ\\nprice: $159.96\\npercentage: 0.94\\nmovement: Up\\nus: price = 42210.57, movement = Down\\neurope: price = 23638.56, movement = Up\\nasia: price = 38183.26, movement = Up\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.run(\"Google\")"
|
||||
]
|
||||
@@ -63,7 +84,115 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using it with Langchain"
|
||||
"In order to create an agent that uses the Google Finance tool install Langgraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langgraph langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"and use the `create_react_agent` functionality to initialize a ReAct agent. You will also need to set up your OPEN_API_KEY (visit https://platform.openai.com) in order to access OpenAI's chat models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"SERP_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"llm = init_chat_model(\"gpt-4o-mini\", model_provider=\"openai\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.load_tools import load_tools\n",
|
||||
"\n",
|
||||
"tools = load_tools([\"google-scholar\", \"google-finance\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"What is Google's stock?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" google_finance (call_8m0txCtxNuQaAv9UlomPhSA1)\n",
|
||||
" Call ID: call_8m0txCtxNuQaAv9UlomPhSA1\n",
|
||||
" Args:\n",
|
||||
" query: Google\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: google_finance\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Query: Google\n",
|
||||
"stock: GOOGL:NASDAQ\n",
|
||||
"price: $159.96\n",
|
||||
"percentage: 0.94\n",
|
||||
"movement: Up\n",
|
||||
"us: price = 42210.57, movement = Down\n",
|
||||
"europe: price = 23638.56, movement = Up\n",
|
||||
"asia: price = 38183.26, movement = Up\n",
|
||||
"\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"Google's stock, listed as GOOGL on NASDAQ, is currently priced at $159.96, with a movement up by 0.94%.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent = create_react_agent(llm, tools)\n",
|
||||
"\n",
|
||||
"events = agent.stream(\n",
|
||||
" {\"messages\": [(\"user\", \"What is Google's stock?\")]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -71,26 +200,12 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"SERP_API_KEY\"] = \"\"\n",
|
||||
"llm = OpenAI()\n",
|
||||
"tools = load_tools([\"google-scholar\", \"google-finance\"], llm=llm)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")\n",
|
||||
"agent.run(\"what is google's stock\")"
|
||||
]
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -104,9 +219,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.5"
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -12,33 +12,49 @@
|
||||
"\n",
|
||||
"This Jupyter Notebook demonstrates how to use the `GraphQLAPIWrapper` component with an Agent.\n",
|
||||
"\n",
|
||||
"In this example, we'll be using the public `Star Wars GraphQL API` available at the following endpoint: https://swapi-graphql.netlify.app/.netlify/functions/index.\n",
|
||||
"In this example, we'll be using the public `Star Wars GraphQL API` available at the following endpoint: https://swapi-graphql.netlify.app/graphql .\n",
|
||||
"\n",
|
||||
"First, you need to install `httpx` and `gql` Python packages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install httpx gql > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-community"
|
||||
]
|
||||
@@ -56,21 +72,36 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.load_tools import load_tools\n",
|
||||
"\n",
|
||||
"tools = load_tools(\n",
|
||||
" [\"graphql\"],\n",
|
||||
" graphql_endpoint=\"https://swapi-graphql.netlify.app/.netlify/functions/index\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
" graphql_endpoint=\"https://swapi-graphql.netlify.app/graphql\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -80,35 +111,55 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Search for the titles of all the stawars films stored in the graphql database that has this schema allFilms {\n",
|
||||
" films {\n",
|
||||
" title\n",
|
||||
" director\n",
|
||||
" releaseDate\n",
|
||||
" speciesConnection {\n",
|
||||
" species {\n",
|
||||
" name\n",
|
||||
" classification\n",
|
||||
" homeworld {\n",
|
||||
" name\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to query the graphql database to get the titles of all the star wars films\n",
|
||||
"Action: query_graphql\n",
|
||||
"Action Input: query { allFilms { films { title } } }\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\"{\\n \\\"allFilms\\\": {\\n \\\"films\\\": [\\n {\\n \\\"title\\\": \\\"A New Hope\\\"\\n },\\n {\\n \\\"title\\\": \\\"The Empire Strikes Back\\\"\\n },\\n {\\n \\\"title\\\": \\\"Return of the Jedi\\\"\\n },\\n {\\n \\\"title\\\": \\\"The Phantom Menace\\\"\\n },\\n {\\n \\\"title\\\": \\\"Attack of the Clones\\\"\\n },\\n {\\n \\\"title\\\": \\\"Revenge of the Sith\\\"\\n }\\n ]\\n }\\n}\"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the titles of all the star wars films\n",
|
||||
"Final Answer: The titles of all the star wars films are: A New Hope, The Empire Strikes Back, Return of the Jedi, The Phantom Menace, Attack of the Clones, and Revenge of the Sith.\u001b[0m\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" query_graphql (call_tN5A0dBbfOMewuw8Yy13bYpW)\n",
|
||||
" Call ID: call_tN5A0dBbfOMewuw8Yy13bYpW\n",
|
||||
" Args:\n",
|
||||
" tool_input: query { allFilms { films { title } } }\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: query_graphql\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"\"{\\n \\\"allFilms\\\": {\\n \\\"films\\\": [\\n {\\n \\\"title\\\": \\\"A New Hope\\\"\\n },\\n {\\n \\\"title\\\": \\\"The Empire Strikes Back\\\"\\n },\\n {\\n \\\"title\\\": \\\"Return of the Jedi\\\"\\n },\\n {\\n \\\"title\\\": \\\"The Phantom Menace\\\"\\n },\\n {\\n \\\"title\\\": \\\"Attack of the Clones\\\"\\n },\\n {\\n \\\"title\\\": \\\"Revenge of the Sith\\\"\\n }\\n ]\\n }\\n}\"\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The titles of all the Star Wars films stored in the database are:\n",
|
||||
"1. A New Hope\n",
|
||||
"2. The Empire Strikes Back\n",
|
||||
"3. Return of the Jedi\n",
|
||||
"4. The Phantom Menace\n",
|
||||
"5. Attack of the Clones\n",
|
||||
"6. Revenge of the Sith\n",
|
||||
"\n",
|
||||
"If you would like more information about any of these films, please let me know!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The titles of all the star wars films are: A New Hope, The Empire Strikes Back, Return of the Jedi, The Phantom Menace, Attack of the Clones, and Revenge of the Sith.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -133,9 +184,24 @@
|
||||
"\n",
|
||||
"suffix = \"Search for the titles of all the stawars films stored in the graphql database that has this schema \"\n",
|
||||
"\n",
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": suffix + graphql_fields,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"agent.run(suffix + graphql_fields)"
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -157,7 +223,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -22,8 +22,26 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "34bb5968",
|
||||
"execution_count": 1,
|
||||
"id": "8b81a74e-db10-4e8d-9f90-83219df30ab3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pyowm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "78ab9fcd-bb7b-434b-9a38-0a9249e35768",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -38,8 +56,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ac4910f8",
|
||||
"execution_count": 3,
|
||||
"id": "0a8aa4b0-6aea-4172-9546-361e127a4a02",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -47,17 +65,17 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"In London,GB, the current weather is as follows:\n",
|
||||
"Detailed status: broken clouds\n",
|
||||
"Wind speed: 2.57 m/s, direction: 240°\n",
|
||||
"Humidity: 55%\n",
|
||||
"Detailed status: overcast clouds\n",
|
||||
"Wind speed: 4.12 m/s, direction: 10°\n",
|
||||
"Humidity: 51%\n",
|
||||
"Temperature: \n",
|
||||
" - Current: 20.12°C\n",
|
||||
" - High: 21.75°C\n",
|
||||
" - Low: 18.68°C\n",
|
||||
" - Feels like: 19.62°C\n",
|
||||
" - Current: 12.82°C\n",
|
||||
" - High: 13.98°C\n",
|
||||
" - Low: 12.01°C\n",
|
||||
" - Feels like: 11.49°C\n",
|
||||
"Rain: {}\n",
|
||||
"Heat index: None\n",
|
||||
"Cloud cover: 75%\n"
|
||||
"Cloud cover: 100%\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -76,76 +94,82 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "b3367417",
|
||||
"execution_count": 4,
|
||||
"id": "402c832c-87c7-4088-b80f-ec1924a43796",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"OPENWEATHERMAP_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"tools = load_tools([\"openweathermap-api\"], llm)\n",
|
||||
"\n",
|
||||
"agent_chain = initialize_agent(\n",
|
||||
" tools=tools, llm=llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
"tools = [weather.run]\n",
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "bf4f6854",
|
||||
"execution_count": 5,
|
||||
"id": "9b423a92-1568-4ee2-9c7d-3b9acf7756a1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"What's the weather like in London?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" run (call_6vPq9neyy7oOnht29ExidE2g)\n",
|
||||
" Call ID: call_6vPq9neyy7oOnht29ExidE2g\n",
|
||||
" Args:\n",
|
||||
" location: London\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: run\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out the current weather in London.\n",
|
||||
"Action: OpenWeatherMap\n",
|
||||
"Action Input: London,GB\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mIn London,GB, the current weather is as follows:\n",
|
||||
"Detailed status: broken clouds\n",
|
||||
"Wind speed: 2.57 m/s, direction: 240°\n",
|
||||
"Humidity: 56%\n",
|
||||
"In London, the current weather is as follows:\n",
|
||||
"Detailed status: overcast clouds\n",
|
||||
"Wind speed: 4.12 m/s, direction: 10°\n",
|
||||
"Humidity: 51%\n",
|
||||
"Temperature: \n",
|
||||
" - Current: 20.11°C\n",
|
||||
" - High: 21.75°C\n",
|
||||
" - Low: 18.68°C\n",
|
||||
" - Feels like: 19.64°C\n",
|
||||
" - Current: 12.82°C\n",
|
||||
" - High: 13.98°C\n",
|
||||
" - Low: 12.01°C\n",
|
||||
" - Feels like: 11.49°C\n",
|
||||
"Rain: {}\n",
|
||||
"Heat index: None\n",
|
||||
"Cloud cover: 75%\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the current weather in London.\n",
|
||||
"Final Answer: The current weather in London is broken clouds, with a wind speed of 2.57 m/s, direction 240°, humidity of 56%, temperature of 20.11°C, high of 21.75°C, low of 18.68°C, and a heat index of None.\u001b[0m\n",
|
||||
"Cloud cover: 100%\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"The weather in London is currently overcast with 100% cloud cover. The temperature is around 12.82°C, feeling like 11.49°C. The wind is blowing at 4.12 m/s from the direction of 10°. Humidity is at 51%. The high for the day is 13.98°C, and the low is 12.01°C.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current weather in London is broken clouds, with a wind speed of 2.57 m/s, direction 240°, humidity of 56%, temperature of 20.11°C, high of 21.75°C, low of 18.68°C, and a heat index of None.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_chain.run(\"What's the weather like in London?\")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"What's the weather like in London?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f2af226a-9cca-468d-b07f-0a928ea61f48",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -164,7 +188,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 1,
|
||||
"id": "70871a99-ffee-47d7-8e02-82eb99971f28",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -51,7 +51,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Barack Hussein Obama II'"
|
||||
"'Barack Obama Full name: Barack Hussein Obama II'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -73,7 +73,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 5,
|
||||
"id": "17a9b1ad-6e84-4949-8ebd-8c52f6b296e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -83,48 +83,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "cf8970a5-00e1-46bd-ba53-6a974eebbc10",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m Yes.\n",
|
||||
"Follow up: How old was Plato when he died?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3meighty\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFollow up: How old was Socrates when he died?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3m| Socrates | \n",
|
||||
"| -------- | \n",
|
||||
"| Born | c. 470 BC Deme Alopece, Athens | \n",
|
||||
"| Died | 399 BC (aged approximately 71) Athens | \n",
|
||||
"| Cause of death | Execution by forced suicide by poisoning | \n",
|
||||
"| Spouse(s) | Xanthippe, Myrto | \n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFollow up: How old was Aristotle when he died?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3m62 years\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mSo the final answer is: Plato\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Plato'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_community.utilities import SearchApiAPIWrapper\n",
|
||||
"from langchain_core.tools import Tool\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
@@ -133,16 +96,88 @@
|
||||
"search = SearchApiAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name=\"Intermediate Answer\",\n",
|
||||
" name=\"intermediate_answer\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to ask with search\",\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "4198dda8-b7a9-4ae9-bcb6-b95e2c7681b9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"self_ask_with_search = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True\n",
|
||||
")\n",
|
||||
"self_ask_with_search.run(\"Who lived longer: Plato, Socrates, or Aristotle?\")"
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c24ad140-d41f-4e99-a42f-11371c3897b5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Who lived longer: Plato, Socrates, or Aristotle?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" intermediate_answer (call_Q0JquDV3SWfnn3rkwJkJaffG)\n",
|
||||
" Call ID: call_Q0JquDV3SWfnn3rkwJkJaffG\n",
|
||||
" Args:\n",
|
||||
" __arg1: Lifespan of Plato\n",
|
||||
" intermediate_answer (call_j9rXzVlrCcGc8HOFnKUH6j5E)\n",
|
||||
" Call ID: call_j9rXzVlrCcGc8HOFnKUH6j5E\n",
|
||||
" Args:\n",
|
||||
" __arg1: Lifespan of Socrates\n",
|
||||
" intermediate_answer (call_IBQT2qn5PzDE6q0ZyfPdhRaX)\n",
|
||||
" Call ID: call_IBQT2qn5PzDE6q0ZyfPdhRaX\n",
|
||||
" Args:\n",
|
||||
" __arg1: Lifespan of Aristotle\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: intermediate_answer\n",
|
||||
"\n",
|
||||
"384–322 BC was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, ...\n",
|
||||
"The Greek philosopher Aristotle (384-322 B.C.) made significant and lasting contributions to nearly every aspect of human knowledge, ...\n",
|
||||
"Aristotle's lifespan (384 - 322) (jan 1, 384 BC – jan 1, 322 BC). Added to timeline: Political Philosophy timeline. ByEdoardo. 25 Aug 2020.\n",
|
||||
"Aristotle was one of the greatest philosophers and scientists the world has ever seen. He was born in 384 bc at Stagirus, a Greek seaport on the coast of Thrace ...\n",
|
||||
"393–c. 370 bce), king of Macedonia and grandfather of Alexander the Great (reigned 336–323 bce). After his father's death in 367, Aristotle ...\n",
|
||||
"It is difficult to rule out that possibility decisively, since little is known about the period of Aristotle's life from 341–335. He evidently ...\n",
|
||||
"Lifespan: c. 384 B.C. to 322 B.C.; Contributions: Considered one of the greatest thinkers in various fields including politics, psychology, and ...\n",
|
||||
"Aristotle (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]) lived 384–322 BC.\n",
|
||||
"Aristotle (384 B.C.E.—322 B.C.E.). Aristotle is a towering figure in ancient Greek philosophy, who made important contributions to logic, criticism, ...\n",
|
||||
"Aristotle. Born: 384 BC in Stagirus, Macedonia, Greece Died: 322 BC in Chalcis, Euboea, Greece. Aristotle was not primarily a mathematician but made ...\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"Based on the information:\n",
|
||||
"\n",
|
||||
"- Plato reportedly lived to be around eighty or eighty-one years old.\n",
|
||||
"- Socrates' exact lifespan is not directly stated here, but he is known historically to have lived approximately from 470 BC to 399 BC, making him around 71 years old.\n",
|
||||
"- Aristotle lived from 384 BC to 322 BC, which means he was about 62 years old.\n",
|
||||
"\n",
|
||||
"Therefore, Plato lived longer than both Socrates and Aristotle.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Who lived longer: Plato, Socrates, or Aristotle?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -157,7 +192,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "6d0b4411-780a-4dcf-91b6-f3544e31e532",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -167,17 +202,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 10,
|
||||
"id": "34e79449-6b33-4b45-9306-7e3dab1b8599",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Azure AI Engineer Be an XpanderCandidatar-meCandidatar-meCandidatar-me\\n\\nShare:\\n\\nAzure AI Engineer\\n\\nA área Digital Xperience da Xpand IT é uma equipa tecnológica de rápido crescimento que se concentra em tecnologias Microsoft e Mobile. A sua principal missão é fornecer soluções de software de alta qualidade que atendam às necessidades do utilizador final, num mundo tecnológico continuamente exigente e em ritmo acelerado, proporcionando a melhor experiência em termos de personalização, performance'"
|
||||
"'No good search result found'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -196,7 +231,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 11,
|
||||
"id": "b16b7cd9-f0fe-4030-a36b-bbb52b19da18",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -206,7 +241,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"id": "e8adb325-2ad0-4a39-9bc2-d220ec3a29be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -214,22 +249,22 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'search_metadata': {'id': 'search_qVdXG2jzvrlqTzayeYoaOb8A',\n",
|
||||
"{'search_metadata': {'id': 'search_6Lpb2Z8vDqdsPRbrGkVgQzRy',\n",
|
||||
" 'status': 'Success',\n",
|
||||
" 'created_at': '2023-09-25T15:22:30Z',\n",
|
||||
" 'request_time_taken': 3.21,\n",
|
||||
" 'parsing_time_taken': 0.03,\n",
|
||||
" 'total_time_taken': 3.24,\n",
|
||||
" 'created_at': '2025-05-11T03:39:28Z',\n",
|
||||
" 'request_time_taken': 0.86,\n",
|
||||
" 'parsing_time_taken': 0.01,\n",
|
||||
" 'total_time_taken': 0.87,\n",
|
||||
" 'request_url': 'https://scholar.google.com/scholar?q=Large+Language+Models&hl=en',\n",
|
||||
" 'html_url': 'https://www.searchapi.io/api/v1/searches/search_qVdXG2jzvrlqTzayeYoaOb8A.html',\n",
|
||||
" 'json_url': 'https://www.searchapi.io/api/v1/searches/search_qVdXG2jzvrlqTzayeYoaOb8A'},\n",
|
||||
" 'html_url': 'https://www.searchapi.io/api/v1/searches/search_6Lpb2Z8vDqdsPRbrGkVgQzRy.html',\n",
|
||||
" 'json_url': 'https://www.searchapi.io/api/v1/searches/search_6Lpb2Z8vDqdsPRbrGkVgQzRy'},\n",
|
||||
" 'search_parameters': {'engine': 'google_scholar',\n",
|
||||
" 'q': 'Large Language Models',\n",
|
||||
" 'hl': 'en'},\n",
|
||||
" 'search_information': {'query_displayed': 'Large Language Models',\n",
|
||||
" 'total_results': 6420000,\n",
|
||||
" 'total_results': 6390000,\n",
|
||||
" 'page': 1,\n",
|
||||
" 'time_taken_displayed': 0.06},\n",
|
||||
" 'time_taken_displayed': 0.08},\n",
|
||||
" 'organic_results': [{'position': 1,\n",
|
||||
" 'title': 'ChatGPT for good? On opportunities and '\n",
|
||||
" 'challenges of large language models for '\n",
|
||||
@@ -245,15 +280,15 @@
|
||||
" 'we argue that large language models in '\n",
|
||||
" 'education require …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '8166055256995715258',\n",
|
||||
" 'total': 410,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=8166055256995715258&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'total': 4675,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=8166055256995715258&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '8166055256995715258',\n",
|
||||
" 'total': 10,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=8166055256995715258&hl=en&as_sdt=0,33'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:uthwmf2nU3EJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'resource': {'name': 'edarxiv.org',\n",
|
||||
" 'total': 16,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=8166055256995715258&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:uthwmf2nU3EJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'osf.io',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://edarxiv.org/5er8f/download?format=pdf'},\n",
|
||||
" 'link': 'https://osf.io/preprints/edarxiv/5er8f/download'},\n",
|
||||
" 'authors': [{'name': 'E Kasneci',\n",
|
||||
" 'id': 'bZVkVvoAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=bZVkVvoAAAAJ&hl=en&oi=sra'},\n",
|
||||
@@ -267,6 +302,82 @@
|
||||
" 'id': 'TjfQ8QkAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=TjfQ8QkAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 2,\n",
|
||||
" 'title': 'A survey on evaluation of large language '\n",
|
||||
" 'models',\n",
|
||||
" 'data_cid': 'o93zfHYlUTIJ',\n",
|
||||
" 'link': 'https://dl.acm.org/doi/abs/10.1145/3641289',\n",
|
||||
" 'publication': 'Y Chang, X Wang, J Wang, Y Wu, L Yang… - '\n",
|
||||
" 'ACM transactions on …, 2024 - dl.acm.org',\n",
|
||||
" 'snippet': '… 3.1 Natural Language Processing Tasks … '\n",
|
||||
" 'the development of language models, '\n",
|
||||
" 'particularly large language models, was to '\n",
|
||||
" 'enhance performance on natural language '\n",
|
||||
" 'processing tasks, …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '3625720365842685347',\n",
|
||||
" 'total': 2864,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=3625720365842685347&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '3625720365842685347',\n",
|
||||
" 'total': 8,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=3625720365842685347&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:o93zfHYlUTIJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'acm.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://dl.acm.org/doi/pdf/10.1145/3641289'},\n",
|
||||
" 'authors': [{'name': 'Y Chang',\n",
|
||||
" 'id': 'Hw-lrpAAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=Hw-lrpAAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'X Wang',\n",
|
||||
" 'id': 'Q7Ieos8AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=Q7Ieos8AAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'J Wang',\n",
|
||||
" 'id': 'hBZ_tKsAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=hBZ_tKsAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'Y Wu',\n",
|
||||
" 'id': 'KVeRu2QAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=KVeRu2QAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'L Yang',\n",
|
||||
" 'id': 'go3sFxcAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=go3sFxcAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 3,\n",
|
||||
" 'title': 'A comprehensive overview of large language '\n",
|
||||
" 'models',\n",
|
||||
" 'data_cid': 'UDLkJGuOVl4J',\n",
|
||||
" 'link': 'https://arxiv.org/abs/2307.06435',\n",
|
||||
" 'publication': 'H Naveed, AU Khan, S Qiu, M Saqib, S '\n",
|
||||
" 'Anwar… - arXiv preprint arXiv …, 2023 - '\n",
|
||||
" 'arxiv.org',\n",
|
||||
" 'snippet': '… Large Language Models (LLMs) have recently '\n",
|
||||
" 'demonstrated remarkable capabilities in '\n",
|
||||
" 'natural language processing tasks and '\n",
|
||||
" 'beyond. This success of LLMs has led to a '\n",
|
||||
" 'large influx of …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '6797777278393922128',\n",
|
||||
" 'total': 990,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=6797777278393922128&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '6797777278393922128',\n",
|
||||
" 'total': 4,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=6797777278393922128&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:UDLkJGuOVl4J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:UDLkJGuOVl4J:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'arxiv.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://arxiv.org/pdf/2307.06435'},\n",
|
||||
" 'authors': [{'name': 'H Naveed',\n",
|
||||
" 'id': 'k5dpooQAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=k5dpooQAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'AU Khan',\n",
|
||||
" 'id': 'sbOhz2UAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=sbOhz2UAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'S Qiu',\n",
|
||||
" 'id': 'OPNVthUAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=OPNVthUAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'M Saqib',\n",
|
||||
" 'id': 'KvbLR3gAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=KvbLR3gAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'S Anwar',\n",
|
||||
" 'id': 'vPJIHywAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=vPJIHywAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 4,\n",
|
||||
" 'title': 'Large language models in medicine',\n",
|
||||
" 'data_cid': 'Ph9AwHTmhzAJ',\n",
|
||||
" 'link': 'https://www.nature.com/articles/s41591-023-02448-8',\n",
|
||||
@@ -279,11 +390,15 @@
|
||||
" '(LLaMA) as its backend model 30 . Finally, '\n",
|
||||
" 'cheap imitations of …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '3497017024792502078',\n",
|
||||
" 'total': 25,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=3497017024792502078&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'total': 2474,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=3497017024792502078&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '3497017024792502078',\n",
|
||||
" 'total': 3,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=3497017024792502078&hl=en&as_sdt=0,33'}},\n",
|
||||
" 'total': 7,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=3497017024792502078&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:Ph9AwHTmhzAJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'google.com',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://drive.google.com/file/d/1FKEGsSZ9GYOeToeKpxB4m3atGRbC-TSm/view'},\n",
|
||||
" 'authors': [{'name': 'AJ Thirunavukarasu',\n",
|
||||
" 'id': '3qb1AYwAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=3qb1AYwAAAAJ&hl=en&oi=sra'},\n",
|
||||
@@ -293,43 +408,132 @@
|
||||
" {'name': 'K Elangovan',\n",
|
||||
" 'id': 'BE_lVTQAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=BE_lVTQAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 3,\n",
|
||||
" 'title': 'Extracting training data from large language '\n",
|
||||
" 'models',\n",
|
||||
" 'data_cid': 'mEYsWK6bWKoJ',\n",
|
||||
" 'link': 'https://www.usenix.org/conference/usenixsecurity21/presentation/carlini-extracting',\n",
|
||||
" 'publication': 'N Carlini, F Tramer, E Wallace, M '\n",
|
||||
" 'Jagielski… - 30th USENIX Security …, '\n",
|
||||
" '2021 - usenix.org',\n",
|
||||
" 'snippet': '… language model trained on scrapes of the '\n",
|
||||
" 'public Internet, and are able to extract '\n",
|
||||
" 'hundreds of verbatim text sequences from the '\n",
|
||||
" 'model’… models are more vulnerable than '\n",
|
||||
" 'smaller models. …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '12274731957504198296',\n",
|
||||
" 'total': 742,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=12274731957504198296&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '12274731957504198296',\n",
|
||||
" 'total': 8,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=12274731957504198296&hl=en&as_sdt=0,33'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:mEYsWK6bWKoJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:mEYsWK6bWKoJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'resource': {'name': 'usenix.org',\n",
|
||||
" {'position': 5,\n",
|
||||
" 'title': 'A watermark for large language models',\n",
|
||||
" 'data_cid': 'BlSyLHT4iiEJ',\n",
|
||||
" 'link': 'https://proceedings.mlr.press/v202/kirchenbauer23a.html',\n",
|
||||
" 'publication': 'J Kirchenbauer, J Geiping, Y Wen… - '\n",
|
||||
" 'International …, 2023 - '\n",
|
||||
" 'proceedings.mlr.press',\n",
|
||||
" 'snippet': '… We propose a watermarking framework for '\n",
|
||||
" 'proprietary language models. The … in the '\n",
|
||||
" 'language model just before it produces a '\n",
|
||||
" 'probability vector. The last layer of the '\n",
|
||||
" 'language model …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '2417017327887471622',\n",
|
||||
" 'total': 774,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=2417017327887471622&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '2417017327887471622',\n",
|
||||
" 'total': 13,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=2417017327887471622&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:BlSyLHT4iiEJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:BlSyLHT4iiEJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'mlr.press',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://www.usenix.org/system/files/sec21-carlini-extracting.pdf'},\n",
|
||||
" 'authors': [{'name': 'N Carlini',\n",
|
||||
" 'id': 'q4qDvAoAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=q4qDvAoAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'F Tramer',\n",
|
||||
" 'id': 'ijH0-a8AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=ijH0-a8AAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'E Wallace',\n",
|
||||
" 'id': 'SgST3LkAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=SgST3LkAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'M Jagielski',\n",
|
||||
" 'id': '_8rw_GMAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=_8rw_GMAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 4,\n",
|
||||
" 'link': 'https://proceedings.mlr.press/v202/kirchenbauer23a/kirchenbauer23a.pdf'},\n",
|
||||
" 'authors': [{'name': 'J Kirchenbauer',\n",
|
||||
" 'id': '48GJrbsAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=48GJrbsAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'J Geiping',\n",
|
||||
" 'id': '206vNCEAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=206vNCEAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'Y Wen',\n",
|
||||
" 'id': 'oUYfjg0AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=oUYfjg0AAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 6,\n",
|
||||
" 'title': 'Welcome to the era of chatgpt et al. the '\n",
|
||||
" 'prospects of large language models',\n",
|
||||
" 'data_cid': '3UrgC1BmpV8J',\n",
|
||||
" 'link': 'https://link.springer.com/article/10.1007/s12599-023-00795-x',\n",
|
||||
" 'publication': 'T Teubner, CM Flath, C Weinhardt… - '\n",
|
||||
" 'Business & Information …, 2023 - '\n",
|
||||
" 'Springer',\n",
|
||||
" 'snippet': 'The emergence of Large Language Models '\n",
|
||||
" '(LLMs) in combination with easy-to-use '\n",
|
||||
" 'interfaces such as ChatGPT, Bing Chat, and '\n",
|
||||
" 'Google’s Bard represent both a Herculean '\n",
|
||||
" 'task and a …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '6892027298743077597',\n",
|
||||
" 'total': 409,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=6892027298743077597&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '6892027298743077597',\n",
|
||||
" 'total': 16,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=6892027298743077597&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:3UrgC1BmpV8J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'springer.com',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://link.springer.com/content/pdf/10.1007/s12599-023-00795-x.pdf'},\n",
|
||||
" 'authors': [{'name': 'T Teubner',\n",
|
||||
" 'id': 'ZeCM1k8AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=ZeCM1k8AAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'CM Flath',\n",
|
||||
" 'id': '5Iy85HsAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=5Iy85HsAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'C Weinhardt',\n",
|
||||
" 'id': 'lhfZxjAAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=lhfZxjAAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 7,\n",
|
||||
" 'title': 'Talking about large language models',\n",
|
||||
" 'data_cid': '3eYYI745r_0J',\n",
|
||||
" 'link': 'https://dl.acm.org/doi/abs/10.1145/3624724',\n",
|
||||
" 'publication': 'M Shanahan - Communications of the ACM, '\n",
|
||||
" '2024 - dl.acm.org',\n",
|
||||
" 'snippet': '… Recently, it has become commonplace to use '\n",
|
||||
" 'the term “large language model” both for the '\n",
|
||||
" 'generative models themselves and for the '\n",
|
||||
" 'systems in which they are embedded, '\n",
|
||||
" 'especially in …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '18279892901315536605',\n",
|
||||
" 'total': 477,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=18279892901315536605&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '18279892901315536605',\n",
|
||||
" 'total': 4,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=18279892901315536605&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:3eYYI745r_0J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'acm.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://dl.acm.org/doi/pdf/10.1145/3624724'},\n",
|
||||
" 'authors': [{'name': 'M Shanahan',\n",
|
||||
" 'id': '00bnGpAAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=00bnGpAAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 8,\n",
|
||||
" 'title': 'Explainability for large language models: A '\n",
|
||||
" 'survey',\n",
|
||||
" 'data_cid': '0AqRKEINMw4J',\n",
|
||||
" 'link': 'https://dl.acm.org/doi/abs/10.1145/3639372',\n",
|
||||
" 'publication': 'H Zhao, H Chen, F Yang, N Liu, H Deng, H '\n",
|
||||
" 'Cai… - ACM Transactions on …, 2024 - '\n",
|
||||
" 'dl.acm.org',\n",
|
||||
" 'snippet': '… Let us consider a scenario where we have a '\n",
|
||||
" 'language model and we input a specific text '\n",
|
||||
" 'into the model. The model then produces a '\n",
|
||||
" 'classification output, such as sentiment …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '1023176118142831312',\n",
|
||||
" 'total': 576,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=1023176118142831312&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '1023176118142831312',\n",
|
||||
" 'total': 7,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=1023176118142831312&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:0AqRKEINMw4J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'acm.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://dl.acm.org/doi/pdf/10.1145/3639372'},\n",
|
||||
" 'authors': [{'name': 'H Zhao',\n",
|
||||
" 'id': '9FobigIAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=9FobigIAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'H Chen',\n",
|
||||
" 'id': 'DyYOgLwAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=DyYOgLwAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'F Yang',\n",
|
||||
" 'id': 'RXFeW-8AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=RXFeW-8AAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'N Liu',\n",
|
||||
" 'id': 'Nir-EDYAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=Nir-EDYAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'H Cai',\n",
|
||||
" 'id': 'Kz-r34UAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=Kz-r34UAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 9,\n",
|
||||
" 'title': 'Emergent abilities of large language models',\n",
|
||||
" 'data_cid': 'hG0iVOrOguoJ',\n",
|
||||
" 'link': 'https://arxiv.org/abs/2206.07682',\n",
|
||||
@@ -341,16 +545,16 @@
|
||||
" 'efficiency on a wide range of downstream '\n",
|
||||
" 'tasks. This paper instead discusses an …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '16898296257676733828',\n",
|
||||
" 'total': 621,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=16898296257676733828&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'total': 3436,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=16898296257676733828&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '16898296257676733828',\n",
|
||||
" 'total': 12,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=16898296257676733828&hl=en&as_sdt=0,33'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:hG0iVOrOguoJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:hG0iVOrOguoJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'total': 11,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=16898296257676733828&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:hG0iVOrOguoJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:hG0iVOrOguoJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'arxiv.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://arxiv.org/pdf/2206.07682.pdf?trk=cndc-detail'},\n",
|
||||
" 'link': 'https://arxiv.org/pdf/2206.07682'},\n",
|
||||
" 'authors': [{'name': 'J Wei',\n",
|
||||
" 'id': 'wA5TK_0AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=wA5TK_0AAAAJ&hl=en&oi=sra'},\n",
|
||||
@@ -362,232 +566,78 @@
|
||||
" 'link': 'https://scholar.google.com/citations?user=WMBXw1EAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'C Raffel',\n",
|
||||
" 'id': 'I66ZBYwAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=I66ZBYwAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'B Zoph',\n",
|
||||
" 'id': 'NL_7iTwAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=NL_7iTwAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 5,\n",
|
||||
" 'title': 'A survey on evaluation of large language '\n",
|
||||
" 'models',\n",
|
||||
" 'data_cid': 'ZYohnzOz-XgJ',\n",
|
||||
" 'link': 'https://arxiv.org/abs/2307.03109',\n",
|
||||
" 'publication': 'Y Chang, X Wang, J Wang, Y Wu, K Zhu… - '\n",
|
||||
" 'arXiv preprint arXiv …, 2023 - arxiv.org',\n",
|
||||
" 'snippet': '… 3.1 Natural Language Processing Tasks … '\n",
|
||||
" 'the development of language models, '\n",
|
||||
" 'particularly large language models, was to '\n",
|
||||
" 'enhance performance on natural language '\n",
|
||||
" 'processing tasks, …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '8717195588046785125',\n",
|
||||
" 'total': 31,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=8717195588046785125&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '8717195588046785125',\n",
|
||||
" 'total': 3,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=8717195588046785125&hl=en&as_sdt=0,33'},\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:ZYohnzOz-XgJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'resource': {'name': 'arxiv.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://arxiv.org/pdf/2307.03109'},\n",
|
||||
" 'authors': [{'name': 'X Wang',\n",
|
||||
" 'id': 'Q7Ieos8AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=Q7Ieos8AAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'J Wang',\n",
|
||||
" 'id': 'YomxTXQAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=YomxTXQAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'Y Wu',\n",
|
||||
" 'id': 'KVeRu2QAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=KVeRu2QAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'K Zhu',\n",
|
||||
" 'id': 'g75dFLYAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=g75dFLYAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 6,\n",
|
||||
" 'title': 'Evaluating large language models trained on '\n",
|
||||
" 'code',\n",
|
||||
" 'data_cid': '3tNvW3l5nU4J',\n",
|
||||
" 'link': 'https://arxiv.org/abs/2107.03374',\n",
|
||||
" 'publication': 'M Chen, J Tworek, H Jun, Q Yuan, HPO '\n",
|
||||
" 'Pinto… - arXiv preprint arXiv …, 2021 - '\n",
|
||||
" 'arxiv.org',\n",
|
||||
" 'snippet': '… We introduce Codex, a GPT language model '\n",
|
||||
" 'finetuned on publicly available code from '\n",
|
||||
" 'GitHub, and study its Python code-writing '\n",
|
||||
" 'capabilities. A distinct production version '\n",
|
||||
" 'of Codex …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '5664817468434011102',\n",
|
||||
" 'total': 941,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=5664817468434011102&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '5664817468434011102',\n",
|
||||
" 'total': 2,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=5664817468434011102&hl=en&as_sdt=0,33'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:3tNvW3l5nU4J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:3tNvW3l5nU4J:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'resource': {'name': 'arxiv.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://arxiv.org/pdf/2107.03374.pdf?trk=public_post_comment-text'},\n",
|
||||
" 'authors': [{'name': 'M Chen',\n",
|
||||
" 'id': '5fU-QMwAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=5fU-QMwAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'J Tworek',\n",
|
||||
" 'id': 'ZPuESCQAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=ZPuESCQAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'Q Yuan',\n",
|
||||
" 'id': 'B059m2EAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=B059m2EAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 7,\n",
|
||||
" 'title': 'Large language models in machine translation',\n",
|
||||
" 'data_cid': 'sY5m_Y3-0Y4J',\n",
|
||||
" 'link': 'http://research.google/pubs/pub33278.pdf',\n",
|
||||
" 'publication': 'T Brants, AC Popat, P Xu, FJ Och, J Dean '\n",
|
||||
" '- 2007 - research.google',\n",
|
||||
" 'snippet': '… the benefits of largescale statistical '\n",
|
||||
" 'language modeling in ma… trillion tokens, '\n",
|
||||
" 'resulting in language models having up to '\n",
|
||||
" '300 … is inexpensive to train on large data '\n",
|
||||
" 'sets and approaches the …',\n",
|
||||
" 'type': 'PDF',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '10291286509313494705',\n",
|
||||
" 'total': 737,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=10291286509313494705&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '10291286509313494705',\n",
|
||||
" 'total': 31,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=10291286509313494705&hl=en&as_sdt=0,33'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:sY5m_Y3-0Y4J:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:sY5m_Y3-0Y4J:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'resource': {'name': 'research.google',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'http://research.google/pubs/pub33278.pdf'},\n",
|
||||
" 'authors': [{'name': 'FJ Och',\n",
|
||||
" 'id': 'ITGdg6oAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=ITGdg6oAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'J Dean',\n",
|
||||
" 'id': 'NMS69lQAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=NMS69lQAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 8,\n",
|
||||
" 'title': 'A watermark for large language models',\n",
|
||||
" 'data_cid': 'BlSyLHT4iiEJ',\n",
|
||||
" 'link': 'https://arxiv.org/abs/2301.10226',\n",
|
||||
" 'publication': 'J Kirchenbauer, J Geiping, Y Wen, J '\n",
|
||||
" 'Katz… - arXiv preprint arXiv …, 2023 - '\n",
|
||||
" 'arxiv.org',\n",
|
||||
" 'snippet': '… To derive this watermark, we examine what '\n",
|
||||
" 'happens in the language model just before it '\n",
|
||||
" 'produces a probability vector. The last '\n",
|
||||
" 'layer of the language model outputs a vector '\n",
|
||||
" 'of logits l(t). …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '2417017327887471622',\n",
|
||||
" 'total': 104,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=2417017327887471622&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '2417017327887471622',\n",
|
||||
" 'total': 4,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=2417017327887471622&hl=en&as_sdt=0,33'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:BlSyLHT4iiEJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:BlSyLHT4iiEJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'resource': {'name': 'arxiv.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://arxiv.org/pdf/2301.10226.pdf?curius=1419'},\n",
|
||||
" 'authors': [{'name': 'J Kirchenbauer',\n",
|
||||
" 'id': '48GJrbsAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=48GJrbsAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'J Geiping',\n",
|
||||
" 'id': '206vNCEAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=206vNCEAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'Y Wen',\n",
|
||||
" 'id': 'oUYfjg0AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=oUYfjg0AAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'J Katz',\n",
|
||||
" 'id': 'yPw4WjoAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=yPw4WjoAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 9,\n",
|
||||
" 'title': 'ChatGPT and other large language models are '\n",
|
||||
" 'double-edged swords',\n",
|
||||
" 'data_cid': 'So0q8TRvxhYJ',\n",
|
||||
" 'link': 'https://pubs.rsna.org/doi/full/10.1148/radiol.230163',\n",
|
||||
" 'publication': 'Y Shen, L Heacock, J Elias, KD Hentel, B '\n",
|
||||
" 'Reig, G Shih… - Radiology, 2023 - '\n",
|
||||
" 'pubs.rsna.org',\n",
|
||||
" 'snippet': '… Large Language Models (LLMs) are deep '\n",
|
||||
" 'learning models trained to understand and '\n",
|
||||
" 'generate natural language. Recent studies '\n",
|
||||
" 'demonstrated that LLMs achieve great success '\n",
|
||||
" 'in a …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '1641121387398204746',\n",
|
||||
" 'total': 231,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=1641121387398204746&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '1641121387398204746',\n",
|
||||
" 'total': 3,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=1641121387398204746&hl=en&as_sdt=0,33'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:So0q8TRvxhYJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'authors': [{'name': 'Y Shen',\n",
|
||||
" 'id': 'XaeN2zgAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=XaeN2zgAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'L Heacock',\n",
|
||||
" 'id': 'tYYM5IkAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=tYYM5IkAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=I66ZBYwAAAAJ&hl=en&oi=sra'}]},\n",
|
||||
" {'position': 10,\n",
|
||||
" 'title': 'Pythia: A suite for analyzing large language '\n",
|
||||
" 'models across training and scaling',\n",
|
||||
" 'data_cid': 'aaIDvsMAD8QJ',\n",
|
||||
" 'link': 'https://proceedings.mlr.press/v202/biderman23a.html',\n",
|
||||
" 'publication': 'S Biderman, H Schoelkopf… - '\n",
|
||||
" 'International …, 2023 - '\n",
|
||||
" 'proceedings.mlr.press',\n",
|
||||
" 'snippet': '… large language models, we prioritize '\n",
|
||||
" 'consistency in model … out the most '\n",
|
||||
" 'performance from each model. For example, we '\n",
|
||||
" '… models, as it is becoming widely used for '\n",
|
||||
" 'the largest models, …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '14127511396791067241',\n",
|
||||
" 'total': 89,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=14127511396791067241&as_sdt=5,33&sciodt=0,33&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '14127511396791067241',\n",
|
||||
" 'total': 3,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=14127511396791067241&hl=en&as_sdt=0,33'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:aaIDvsMAD8QJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" 'cached_page_link': 'https://scholar.googleusercontent.com/scholar?q=cache:aaIDvsMAD8QJ:scholar.google.com/+Large+Language+Models&hl=en&as_sdt=0,33'},\n",
|
||||
" 'resource': {'name': 'mlr.press',\n",
|
||||
" 'title': 'A systematic evaluation of large language '\n",
|
||||
" 'models of code',\n",
|
||||
" 'data_cid': '-iQSW0h72hYJ',\n",
|
||||
" 'link': 'https://dl.acm.org/doi/abs/10.1145/3520312.3534862',\n",
|
||||
" 'publication': 'FF Xu, U Alon, G Neubig, VJ Hellendoorn '\n",
|
||||
" '- Proceedings of the 6th ACM …, 2022 - '\n",
|
||||
" 'dl.acm.org',\n",
|
||||
" 'snippet': '… largest language models for code. We also '\n",
|
||||
" 'release PolyCoder, a large open-source '\n",
|
||||
" 'language model for code, trained exclusively '\n",
|
||||
" 'on code in 12 different programming '\n",
|
||||
" 'languages. In the …',\n",
|
||||
" 'inline_links': {'cited_by': {'cites_id': '1646764164453115130',\n",
|
||||
" 'total': 764,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cites=1646764164453115130&as_sdt=2005&sciodt=0,5&hl=en'},\n",
|
||||
" 'versions': {'cluster_id': '1646764164453115130',\n",
|
||||
" 'total': 6,\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?cluster=1646764164453115130&hl=en&as_sdt=0,5'},\n",
|
||||
" 'related_articles_link': 'https://scholar.google.com/scholar?q=related:-iQSW0h72hYJ:scholar.google.com/&scioq=Large+Language+Models&hl=en&as_sdt=0,5'},\n",
|
||||
" 'resource': {'name': 'acm.org',\n",
|
||||
" 'format': 'PDF',\n",
|
||||
" 'link': 'https://proceedings.mlr.press/v202/biderman23a/biderman23a.pdf'},\n",
|
||||
" 'authors': [{'name': 'S Biderman',\n",
|
||||
" 'id': 'bO7H0DAAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=bO7H0DAAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'H Schoelkopf',\n",
|
||||
" 'id': 'XLahYIYAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=XLahYIYAAAAJ&hl=en&oi=sra'}]}],\n",
|
||||
" 'related_searches': [{'query': 'large language models machine',\n",
|
||||
" 'highlighted': ['machine'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=1&q=large+language+models+machine&qst=ib'},\n",
|
||||
" {'query': 'large language models pruning',\n",
|
||||
" 'highlighted': ['pruning'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=2&q=large+language+models+pruning&qst=ib'},\n",
|
||||
" {'query': 'large language models multitask learners',\n",
|
||||
" 'highlighted': ['multitask learners'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=3&q=large+language+models+multitask+learners&qst=ib'},\n",
|
||||
" {'query': 'large language models speech recognition',\n",
|
||||
" 'highlighted': ['speech recognition'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=4&q=large+language+models+speech+recognition&qst=ib'},\n",
|
||||
" 'link': 'https://dl.acm.org/doi/pdf/10.1145/3520312.3534862'},\n",
|
||||
" 'authors': [{'name': 'FF Xu',\n",
|
||||
" 'id': '1hXyfIkAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=1hXyfIkAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'U Alon',\n",
|
||||
" 'id': 'QBn7vq8AAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=QBn7vq8AAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'G Neubig',\n",
|
||||
" 'id': 'wlosgkoAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=wlosgkoAAAAJ&hl=en&oi=sra'},\n",
|
||||
" {'name': 'VJ Hellendoorn',\n",
|
||||
" 'id': 'PfYrc5kAAAAJ',\n",
|
||||
" 'link': 'https://scholar.google.com/citations?user=PfYrc5kAAAAJ&hl=en&oi=sra'}]}],\n",
|
||||
" 'related_searches': [{'query': 'emergent large language models',\n",
|
||||
" 'highlighted': ['emergent'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=1&q=emergent+large+language+models&qst=ib'},\n",
|
||||
" {'query': 'large language models abilities',\n",
|
||||
" 'highlighted': ['abilities'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=2&q=large+language+models+abilities&qst=ib'},\n",
|
||||
" {'query': 'prompt large language models',\n",
|
||||
" 'highlighted': ['prompt'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=3&q=prompt+large+language+models&qst=ib'},\n",
|
||||
" {'query': 'large language models training '\n",
|
||||
" 'compute-optimal',\n",
|
||||
" 'highlighted': ['training compute-optimal'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=4&q=large+language+models+training+compute-optimal&qst=ib'},\n",
|
||||
" {'query': 'large language models machine translation',\n",
|
||||
" 'highlighted': ['machine translation'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=5&q=large+language+models+machine+translation&qst=ib'},\n",
|
||||
" {'query': 'emergent abilities of large language models',\n",
|
||||
" 'highlighted': ['emergent abilities of'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=6&q=emergent+abilities+of+large+language+models&qst=ir'},\n",
|
||||
" {'query': 'language models privacy risks',\n",
|
||||
" 'highlighted': ['privacy risks'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=7&q=language+models+privacy+risks&qst=ir'},\n",
|
||||
" {'query': 'language model fine tuning',\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=5&q=large+language+models+machine+translation&qst=ib'},\n",
|
||||
" {'query': 'large language models zero shot',\n",
|
||||
" 'highlighted': ['zero shot'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=6&q=large+language+models+zero+shot&qst=ib'},\n",
|
||||
" {'query': 'large language models chatgpt',\n",
|
||||
" 'highlighted': ['chatgpt'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=7&q=large+language+models+chatgpt&qst=ib'},\n",
|
||||
" {'query': 'fine tuning large language models',\n",
|
||||
" 'highlighted': ['fine tuning'],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,33&qsp=8&q=language+model+fine+tuning&qst=ir'}],\n",
|
||||
" 'link': 'https://scholar.google.com/scholar?hl=en&as_sdt=0,5&qsp=8&q=fine+tuning+large+language+models&qst=ib'}],\n",
|
||||
" 'pagination': {'current': 1,\n",
|
||||
" 'next': 'https://scholar.google.com/scholar?start=10&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" 'other_pages': {'2': 'https://scholar.google.com/scholar?start=10&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" '3': 'https://scholar.google.com/scholar?start=20&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" '4': 'https://scholar.google.com/scholar?start=30&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" '5': 'https://scholar.google.com/scholar?start=40&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" '6': 'https://scholar.google.com/scholar?start=50&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" '7': 'https://scholar.google.com/scholar?start=60&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" '8': 'https://scholar.google.com/scholar?start=70&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" '9': 'https://scholar.google.com/scholar?start=80&q=Large+Language+Models&hl=en&as_sdt=0,33',\n",
|
||||
" '10': 'https://scholar.google.com/scholar?start=90&q=Large+Language+Models&hl=en&as_sdt=0,33'}}}\n"
|
||||
" 'next': 'https://scholar.google.com/scholar?start=10&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" 'other_pages': {'2': 'https://scholar.google.com/scholar?start=10&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" '3': 'https://scholar.google.com/scholar?start=20&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" '4': 'https://scholar.google.com/scholar?start=30&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" '5': 'https://scholar.google.com/scholar?start=40&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" '6': 'https://scholar.google.com/scholar?start=50&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" '7': 'https://scholar.google.com/scholar?start=60&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" '8': 'https://scholar.google.com/scholar?start=70&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" '9': 'https://scholar.google.com/scholar?start=80&q=Large+Language+Models&hl=en&as_sdt=0,5',\n",
|
||||
" '10': 'https://scholar.google.com/scholar?start=90&q=Large+Language+Models&hl=en&as_sdt=0,5'}}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -596,6 +646,14 @@
|
||||
"results = search.results(\"Large Language Models\")\n",
|
||||
"pprint.pp(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "11ab5938-e298-471d-96fc-50405ffad35c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -614,7 +672,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -30,11 +30,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet python-steam-api python-decouple"
|
||||
"%pip install --upgrade --quiet python-steam-api python-decouple steamspypi"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -48,15 +56,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"STEAM_KEY\"] = \"xyz\"\n",
|
||||
"os.environ[\"STEAM_ID\"] = \"123\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"abc\""
|
||||
"os.environ[\"STEAM_KEY\"] = \"\"\n",
|
||||
"os.environ[\"STEAM_ID\"] = \"\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -70,63 +78,83 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_community.agent_toolkits.steam.toolkit import SteamToolkit\n",
|
||||
"from langchain_community.utilities.steam import SteamWebAPIWrapper\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
"\n",
|
||||
"steam = SteamWebAPIWrapper()\n",
|
||||
"tools = [steam.run]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"Steam = SteamWebAPIWrapper()\n",
|
||||
"toolkit = SteamToolkit.from_steam_api_wrapper(Steam)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"can you give the information about the game Terraria?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" run (call_6vHAXSIL2MPugXxlv5uyf9Xk)\n",
|
||||
" Call ID: call_6vHAXSIL2MPugXxlv5uyf9Xk\n",
|
||||
" Args:\n",
|
||||
" mode: get_games_details\n",
|
||||
" game: Terraria\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: run\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find the game details\n",
|
||||
"Action: Get Games Details\n",
|
||||
"Action Input: Terraria\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe id is: 105600\n",
|
||||
"The id is: [105600]\n",
|
||||
"The link is: https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13\n",
|
||||
"The price is: $9.99\n",
|
||||
"The summary of the game is: Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the player’s control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a world of your own? Key features: Sandbox Play Randomly generated worlds Free Content Updates \n",
|
||||
"The supported languages of the game are: English, French, Italian, German, Spanish - Spain, Polish, Portuguese - Brazil, Russian, Simplified Chinese\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Terraria is a game with an id of 105600, a link of https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13, a price of $9.99, a summary of \"Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the player’s control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{'input': 'can you give the information about the game Terraria', 'output': 'Terraria is a game with an id of 105600, a link of https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13, a price of $9.99, a summary of \"Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the player’s control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a'}\n"
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"Terraria is a game where you can dig, fight, explore, and build in a world that is totally at your fingertips. The game gives you the freedom to survive, seek fortune, and achieve glory. You can explore cavernous expanses in search of treasure and materials to craft various gear, machinery, and aesthetic items. Alternatively, you can challenge powerful foes or construct your own city to house mysterious allies you may encounter. The game blends classic action elements with sandbox-style creativity, offering a unique experience where your journey and destination are controlled by you.\n",
|
||||
"\n",
|
||||
"Key features of Terraria include sandbox play, randomly generated worlds, and free content updates.\n",
|
||||
"\n",
|
||||
"The game is priced at $9.99 and supports multiple languages including English, French, Italian, German, Spanish (Spain), Polish, Portuguese (Brazil), Russian, and Simplified Chinese.\n",
|
||||
"\n",
|
||||
"You can find more information and purchase it here: [Terraria on Steam](https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"out = agent(\"can you give the information about the game Terraria\")\n",
|
||||
"print(out)"
|
||||
"events = agent.stream(\n",
|
||||
" {\"messages\": [(\"user\", \"can you give the information about the game Terraria?\")]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -145,7 +173,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -19,9 +19,9 @@
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"| Class | Package | Serializable | [JS support](https://js.langchain.com/docs/integrations/tools/tavily_search) | Package latest |\n",
|
||||
"| Class | Package | Serializable | [JS support](https://js.langchain.com/docs/integrations/tools/tavily_extract/) | Package latest |\n",
|
||||
"|:--------------------------------------------------------------|:---------------------------------------------------------------| :---: | :---: | :---: |\n",
|
||||
"| [TavilyExtract](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | ❌ |  |\n",
|
||||
"| [TavilyExtract](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | ✅ |  |\n",
|
||||
"\n",
|
||||
"### Tool features\n",
|
||||
"| [Returns artifact](/docs/how_to/tool_artifacts/) | Native async | Return data | Pricing |\n",
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
"### Integration details\n",
|
||||
"| Class | Package | Serializable | [JS support](https://js.langchain.com/docs/integrations/tools/tavily_search) | Package latest |\n",
|
||||
"|:--------------------------------------------------------------|:---------------------------------------------------------------| :---: | :---: | :---: |\n",
|
||||
"| [TavilySearch](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | ❌ |  |\n",
|
||||
"| [TavilySearch](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | ✅ |  |\n",
|
||||
"\n",
|
||||
"### Tool features\n",
|
||||
"| [Returns artifact](/docs/how_to/tool_artifacts/) | Native async | Return data | Pricing |\n",
|
||||
|
||||
501
docs/docs/integrations/tools/vectara.ipynb
Normal file
501
docs/docs/integrations/tools/vectara.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -17,10 +17,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "38717a85-2c3c-4452-a1c7-1ed4dea3da86",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet yfinance"
|
||||
]
|
||||
@@ -35,121 +43,136 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"id": "d137dd6c-d3d3-4813-af65-59eaaa6b3d76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...\""
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"..\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "fc42f766-9ce6-4ba3-be6c-5ba8a345b0d3",
|
||||
"execution_count": null,
|
||||
"id": "af297977-4fc3-421f-9ce1-f62c1c5b026a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0.0)\n",
|
||||
"tools = [YahooFinanceNewsTool()]\n",
|
||||
"agent_chain = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "3d1614b4-508e-4689-84b1-2a387f80aeb1",
|
||||
"execution_count": 4,
|
||||
"id": "ac3cbec8-4135-4f5a-bb35-299730c000bd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"What happened today with Microsoft stocks?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" yahoo_finance_news (call_s1Waj1rAoJ89CfxWX1RWDiWL)\n",
|
||||
" Call ID: call_s1Waj1rAoJ89CfxWX1RWDiWL\n",
|
||||
" Args:\n",
|
||||
" query: MSFT\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: yahoo_finance_news\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mI should check the latest financial news about Microsoft stocks.\n",
|
||||
"Action: yahoo_finance_news\n",
|
||||
"Action Input: MSFT\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mMicrosoft (MSFT) Gains But Lags Market: What You Should Know\n",
|
||||
"In the latest trading session, Microsoft (MSFT) closed at $328.79, marking a +0.12% move from the previous day.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI have the latest information on Microsoft stocks.\n",
|
||||
"Final Answer: Microsoft (MSFT) closed at $328.79, with a +0.12% move from the previous day.\u001b[0m\n",
|
||||
"Microsoft (MSFT), Meta Platforms (META) Reported “Home Run” Results: Dan Ives’ Recent Comments\n",
|
||||
"Microsoft (MSFT) and Meta Platforms (META) delivered “home run” results yesterday, as the AI Revolution has not been slowed by the Trump administration’s tariffs, Dan Ives, the Managing Director and Senior Equity Research Analyst at Wedbush Securities said on CNBC recently. Ives covers tech stocks. Tech Is Poised for a Comeback, Ives Indicates “The tech […]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"Today, Microsoft (MSFT) reported strong financial results, described as \"home run\" results by Dan Ives, Managing Director and Senior Equity Research Analyst at Wedbush Securities. Despite the Trump administration’s tariffs, the AI Revolution driving tech stocks like Microsoft has not slowed down, indicating a positive outlook for the company and the tech sector overall.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Microsoft (MSFT) closed at $328.79, with a +0.12% move from the previous day.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_chain.invoke(\n",
|
||||
" \"What happened today with Microsoft stocks?\",\n",
|
||||
")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"What happened today with Microsoft stocks?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "c899b64d-86a5-452c-b576-e94f485c27ea",
|
||||
"execution_count": 5,
|
||||
"id": "4496b06b-8b57-4fa8-9b86-4db407caa807",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"How does Microsoft feels today comparing with Nvidia?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" yahoo_finance_news (call_r9m4YxdEqWeXotkNgK8jGzeJ)\n",
|
||||
" Call ID: call_r9m4YxdEqWeXotkNgK8jGzeJ\n",
|
||||
" Args:\n",
|
||||
" query: MSFT\n",
|
||||
" yahoo_finance_news (call_fxj3AIKPB4MYuquvFFWrBD8B)\n",
|
||||
" Call ID: call_fxj3AIKPB4MYuquvFFWrBD8B\n",
|
||||
" Args:\n",
|
||||
" query: NVDA\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: yahoo_finance_news\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mI should compare the current sentiment of Microsoft and Nvidia.\n",
|
||||
"Action: yahoo_finance_news\n",
|
||||
"Action Input: MSFT\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mMicrosoft (MSFT) Gains But Lags Market: What You Should Know\n",
|
||||
"In the latest trading session, Microsoft (MSFT) closed at $328.79, marking a +0.12% move from the previous day.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to find the current sentiment of Nvidia as well.\n",
|
||||
"Action: yahoo_finance_news\n",
|
||||
"Action Input: NVDA\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI now know the current sentiment of both Microsoft and Nvidia.\n",
|
||||
"Final Answer: I cannot compare the sentiment of Microsoft and Nvidia as I only have information about Microsoft.\u001b[0m\n",
|
||||
"NVIDIA Corporation (NVDA): Among Ken Fisher’s Technology Stock Picks with Huge Upside Potential\n",
|
||||
"We recently published an article titled Billionaire Ken Fisher’s 10 Technology Stock Picks with Huge Upside Potential. In this article, we are going to take a look at where NVIDIA Corporation (NASDAQ:NVDA) stands against the other technology stocks. Technology stocks have faced heightened volatility in 2025, with market sentiment swinging sharply in response to President Donald […]\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"Nvidia (NVDA) Redesigns Chips to Sidestep U.S. Export Ban, Eyes June China Rollout\n",
|
||||
"Nvidia plans China-specific AI chip revamp after new U.S. export limits\n",
|
||||
"\n",
|
||||
"Is NVIDIA (NVDA) the Best NASDAQ Stock to Buy According to Billionaires?\n",
|
||||
"We recently published a list of 10 Best NASDAQ Stocks to Buy According to Billionaires. In this article, we are going to take a look at where NVIDIA Corporation (NASDAQ:NVDA) stands against other best NASDAQ stocks to buy according to billionaires. The latest market data shows that the US economy contracted at an annualized rate […]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"Today, Microsoft (MSFT) is viewed positively with recent strong earnings reported, described as \"home run\" results, indicating confidence in its performance amid an ongoing AI revolution.\n",
|
||||
"\n",
|
||||
"Nvidia (NVDA) is also in focus with its strategic moves, such as redesigning AI chips to bypass U.S. export bans and targeting a China rollout. It is considered one of the technology stocks with significant upside potential, attracting attention from notable investors.\n",
|
||||
"\n",
|
||||
"In summary, both Microsoft and Nvidia have positive sentiments today, with Microsoft showing strong financial results and Nvidia making strategic advancements in AI technology and market positioning.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'I cannot compare the sentiment of Microsoft and Nvidia as I only have information about Microsoft.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_chain.invoke(\n",
|
||||
" \"How does Microsoft feels today comparing with Nvidia?\",\n",
|
||||
")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"How does Microsoft feels today comparing with Nvidia?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -162,7 +185,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 6,
|
||||
"id": "7879b79c-b5c7-4a5d-8338-edda53ff41a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -172,17 +195,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 7,
|
||||
"id": "ac989456-33bc-4478-874e-98b9cb24d113",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'No news found for company that searched with NVDA ticker.'"
|
||||
"'NVIDIA Corporation (NVDA): Among Ken Fisher’s Technology Stock Picks with Huge Upside Potential\\nWe recently published an article titled Billionaire Ken Fisher’s 10 Technology Stock Picks with Huge Upside Potential. In this article, we are going to take a look at where NVIDIA Corporation (NASDAQ:NVDA) stands against the other technology stocks. Technology stocks have faced heightened volatility in 2025, with market sentiment swinging sharply in response to President Donald […]\\n\\nNvidia (NVDA) Redesigns Chips to Sidestep U.S. Export Ban, Eyes June China Rollout\\nNvidia plans China-specific AI chip revamp after new U.S. export limits\\n\\nIs NVIDIA (NVDA) the Best NASDAQ Stock to Buy According to Billionaires?\\nWe recently published a list of 10 Best NASDAQ Stocks to Buy According to Billionaires. In this article, we are going to take a look at where NVIDIA Corporation (NASDAQ:NVDA) stands against other best NASDAQ stocks to buy according to billionaires. The latest market data shows that the US economy contracted at an annualized rate […]'"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -193,7 +216,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 8,
|
||||
"id": "46c697aa-102e-48d4-9834-081671aad40a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -201,11 +224,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Top Research Reports for Apple, Broadcom & Caterpillar\n",
|
||||
"Today's Research Daily features new research reports on 16 major stocks, including Apple Inc. (AAPL), Broadcom Inc. (AVGO) and Caterpillar Inc. (CAT).\n",
|
||||
"\n",
|
||||
"Apple Stock on Pace for Worst Month of the Year\n",
|
||||
"Apple (AAPL) shares are on pace for their worst month of the year, according to Dow Jones Market Data. The stock is down 4.8% so far in August, putting it on pace for its worst month since December 2022, when it fell 12%.\n"
|
||||
"No news found for company that searched with AAPL ticker.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -239,7 +258,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
"datasets stored in Aerospike. This new service lives outside of Aerospike and\n",
|
||||
"builds an index to perform those searches.\n",
|
||||
"\n",
|
||||
"This notebook showcases the functionality of the LangChain Aerospike VectorStore\n",
|
||||
"integration.\n",
|
||||
"This notebook showcases the functionality of the [LangChain Aerospike VectorStore\n",
|
||||
"integration](https://github.com/aerospike/langchain-aerospike).\n",
|
||||
"\n",
|
||||
"## Install AVS\n",
|
||||
"\n",
|
||||
@@ -25,11 +25,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"AVS_HOST = \"<avs-ip>\"\n",
|
||||
"AVS_HOST = \"<avs_ip>\"\n",
|
||||
"AVS_PORT = 5000"
|
||||
]
|
||||
},
|
||||
@@ -43,15 +43,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install --upgrade --quiet aerospike-vector-search==3.0.1 langchain-community sentence-transformers langchain"
|
||||
"!pip install --upgrade --quiet aerospike-vector-search==4.2.0 langchain-aerospike langchain-community sentence-transformers langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -65,28 +75,32 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2024-05-10 17:28:17-- https://github.com/aerospike/aerospike-vector-search-examples/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
|
||||
"Resolving github.com (github.com)... 140.82.116.4\n",
|
||||
"Connecting to github.com (github.com)|140.82.116.4|:443... connected.\n",
|
||||
"--2025-05-07 21:06:30-- https://github.com/aerospike/aerospike-vector-search-examples/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
|
||||
"Resolving github.com (github.com)... 140.82.116.3\n",
|
||||
"Connecting to github.com (github.com)|140.82.116.3|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 301 Moved Permanently\n",
|
||||
"Location: https://github.com/aerospike/aerospike-vector/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz [following]\n",
|
||||
"--2025-05-07 21:06:30-- https://github.com/aerospike/aerospike-vector/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
|
||||
"Reusing existing connection to github.com:443.\n",
|
||||
"HTTP request sent, awaiting response... 302 Found\n",
|
||||
"Location: https://raw.githubusercontent.com/aerospike/aerospike-vector-search-examples/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz [following]\n",
|
||||
"--2024-05-10 17:28:17-- https://raw.githubusercontent.com/aerospike/aerospike-vector-search-examples/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
|
||||
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n",
|
||||
"Location: https://raw.githubusercontent.com/aerospike/aerospike-vector/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz [following]\n",
|
||||
"--2025-05-07 21:06:30-- https://raw.githubusercontent.com/aerospike/aerospike-vector/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n",
|
||||
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n",
|
||||
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 11597643 (11M) [application/octet-stream]\n",
|
||||
"Saving to: ‘quotes.csv.tgz’\n",
|
||||
"\n",
|
||||
"quotes.csv.tgz 100%[===================>] 11.06M 1.94MB/s in 6.1s \n",
|
||||
"quotes.csv.tgz 100%[===================>] 11.06M 12.7MB/s in 0.9s \n",
|
||||
"\n",
|
||||
"2024-05-10 17:28:23 (1.81 MB/s) - ‘quotes.csv.tgz’ saved [11597643/11597643]\n",
|
||||
"2025-05-07 21:06:32 (12.7 MB/s) - ‘quotes.csv.tgz’ saved [11597643/11597643]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
@@ -106,7 +120,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -132,14 +146,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content=\"quote: I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.\" metadata={'source': './quotes.csv', 'row': 0, 'author': 'Marilyn Monroe', 'category': 'attributed-no-source, best, life, love, mistakes, out-of-control, truth, worst'}\n"
|
||||
"page_content='quote: I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.' metadata={'source': './quotes.csv', 'row': 0, 'author': 'Marilyn Monroe', 'category': 'attributed-no-source, best, life, love, mistakes, out-of-control, truth, worst'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -158,178 +172,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "60662fc2676a46a2ac48fbf30d9c85fe",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"modules.json: 0%| | 0.00/349 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "319412217d3944488f135c8bf8bca73b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"config_sentence_transformers.json: 0%| | 0.00/116 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "eb020ec2e2f4486294f85c490ef4a387",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"README.md: 0%| | 0.00/10.7k [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "65d248263e4049bea4f6b554640a6aae",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"sentence_bert_config.json: 0%| | 0.00/53.0 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
||||
" warnings.warn(\n"
|
||||
"/var/folders/h5/lm2_c1xs3s32kwp11prnpftw0000gp/T/ipykernel_84638/3255399720.py:6: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-huggingface package and should be used instead. To use it run `pip install -U :class:`~langchain-huggingface` and import as `from :class:`~langchain_huggingface import HuggingFaceEmbeddings``.\n",
|
||||
" embedder = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
|
||||
"/Users/dwelch/Desktop/everything/projects/langchain/myfork/langchain/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c6b09a49fbd84c799ea28ace296406e3",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"config.json: 0%| | 0.00/612 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "7e649688c67544d5af6bdd883c47d315",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model.safetensors: 0%| | 0.00/90.9M [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "de447c7e4df1485ead14efae1faf96d6",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"tokenizer_config.json: 0%| | 0.00/350 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "83ad1f289cd04f73aafca01a8e68e63b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "2b612221e29e433cb50a54a6b838f5af",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "1f5f0c29c58642478cd665731728dad0",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"special_tokens_map.json: 0%| | 0.00/112 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "dff1d16a5a6d4d20ac39adb5c9425cf6",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"1_Pooling/config.json: 0%| | 0.00/190 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -352,7 +206,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -364,9 +218,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from aerospike_vector_search import AdminClient, Client, HostPort\n",
|
||||
"from aerospike_vector_search import Client, HostPort\n",
|
||||
"from aerospike_vector_search.types import VectorDistanceMetric\n",
|
||||
"from langchain_community.vectorstores import Aerospike\n",
|
||||
"from langchain_aerospike.vectorstores import Aerospike\n",
|
||||
"\n",
|
||||
"# Here we are using the AVS host and port you configured earlier\n",
|
||||
"seed = HostPort(host=AVS_HOST, port=AVS_PORT)\n",
|
||||
@@ -381,13 +235,10 @@
|
||||
"VECTOR_KEY = \"vector\"\n",
|
||||
"\n",
|
||||
"client = Client(seeds=seed)\n",
|
||||
"admin_client = AdminClient(\n",
|
||||
" seeds=seed,\n",
|
||||
")\n",
|
||||
"index_exists = False\n",
|
||||
"\n",
|
||||
"# Check if the index already exists. If not, create it\n",
|
||||
"for index in admin_client.index_list():\n",
|
||||
"for index in client.index_list():\n",
|
||||
" if index[\"id\"][\"namespace\"] == NAMESPACE and index[\"id\"][\"name\"] == INDEX_NAME:\n",
|
||||
" index_exists = True\n",
|
||||
" print(f\"{INDEX_NAME} already exists. Skipping creation\")\n",
|
||||
@@ -395,7 +246,7 @@
|
||||
"\n",
|
||||
"if not index_exists:\n",
|
||||
" print(f\"{INDEX_NAME} does not exist. Creating index\")\n",
|
||||
" admin_client.index_create(\n",
|
||||
" client.index_create(\n",
|
||||
" namespace=NAMESPACE,\n",
|
||||
" name=INDEX_NAME,\n",
|
||||
" vector_field=VECTOR_KEY,\n",
|
||||
@@ -409,8 +260,6 @@
|
||||
" },\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"admin_client.close()\n",
|
||||
"\n",
|
||||
"docstore = Aerospike.from_documents(\n",
|
||||
" documents,\n",
|
||||
" embedder,\n",
|
||||
@@ -432,7 +281,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -440,31 +289,31 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"~~~~ Document 0 ~~~~\n",
|
||||
"auto-generated id: f53589dd-e3e0-4f55-8214-766ca8dc082f\n",
|
||||
"auto-generated id: 4984b472-8a32-4552-b3eb-f03b31b68031\n",
|
||||
"author: Carl Sagan, Cosmos\n",
|
||||
"quote: The Cosmos is all that is or was or ever will be. Our feeblest contemplations of the Cosmos stir us -- there is a tingling in the spine, a catch in the voice, a faint sensation, as if a distant memory, of falling from a height. We know we are approaching the greatest of mysteries.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 1 ~~~~\n",
|
||||
"auto-generated id: dde3e5d1-30b7-47b4-aab7-e319d14e1810\n",
|
||||
"author: Elizabeth Gilbert\n",
|
||||
"quote: The love that moves the sun and the other stars.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 2 ~~~~\n",
|
||||
"auto-generated id: fd56575b-2091-45e7-91c1-9efff2fe5359\n",
|
||||
"auto-generated id: 486c8d87-8dd7-450d-9008-d7549e680ffb\n",
|
||||
"author: Renee Ahdieh, The Rose & the Dagger\n",
|
||||
"quote: From the stars, to the stars.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 2 ~~~~\n",
|
||||
"auto-generated id: 4b43b309-ce51-498c-b225-5254383b5b4a\n",
|
||||
"author: Elizabeth Gilbert\n",
|
||||
"quote: The love that moves the sun and the other stars.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 3 ~~~~\n",
|
||||
"auto-generated id: 8567ed4e-885b-44a7-b993-e0caf422b3c9\n",
|
||||
"auto-generated id: af784a10-f498-4570-bf81-2ffdca35440e\n",
|
||||
"author: Dante Alighieri, Paradiso\n",
|
||||
"quote: Love, that moves the sun and the other stars\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 4 ~~~~\n",
|
||||
"auto-generated id: f868c25e-c54d-48cd-a5a8-14bf402f9ea8\n",
|
||||
"auto-generated id: b45d5d5e-d818-4206-ae6b-b1d166ea3d43\n",
|
||||
"author: Thich Nhat Hanh, Teachings on Love\n",
|
||||
"quote: Through my love for you, I want to express my love for the whole cosmos, the whole of humanity, and all beings. By living with you, I want to learn to love everyone and all species. If I succeed in loving you, I will be able to love everyone and all species on Earth... This is the real message of love.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
@@ -502,7 +351,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -510,7 +359,7 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"New IDs\n",
|
||||
"['972846bd-87ae-493b-8ba3-a3d023c03948', '8171122e-cbda-4eb7-a711-6625b120893b', '53b54409-ac19-4d90-b518-d7c40bf5ee5d']\n"
|
||||
"['adf8064e-9c0e-46e2-b193-169c36432f4c', 'cf65b5ed-a0f4-491a-86ad-dcacc23c2815', '2ef52efd-d9b7-4077-bc14-defdf0b7dd2f']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -552,7 +401,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -560,25 +409,25 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"~~~~ Document 0 ~~~~\n",
|
||||
"auto-generated id: 67d5b23f-b2d2-4872-80ad-5834ea08aa64\n",
|
||||
"auto-generated id: 91e77b39-a528-40c6-a58a-486ae85f991a\n",
|
||||
"author: John Grogan, Marley and Me: Life and Love With the World's Worst Dog\n",
|
||||
"quote: Such short little lives our pets have to spend with us, and they spend most of it waiting for us to come home each day. It is amazing how much love and laughter they bring into our lives and even how much closer we become with each other because of them.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 1 ~~~~\n",
|
||||
"auto-generated id: a9b28eb0-a21c-45bf-9e60-ab2b80e988d8\n",
|
||||
"auto-generated id: c585b4ec-92b5-4579-948c-0529373abc2a\n",
|
||||
"author: John Grogan, Marley and Me: Life and Love With the World's Worst Dog\n",
|
||||
"quote: Dogs are great. Bad dogs, if you can really call them that, are perhaps the greatest of them all.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 2 ~~~~\n",
|
||||
"auto-generated id: ee7434c8-2551-4651-8a22-58514980fb4a\n",
|
||||
"auto-generated id: 5768b31c-fac4-4af7-84b4-fb11bbfcb590\n",
|
||||
"author: Colleen Houck, Tiger's Curse\n",
|
||||
"quote: He then put both hands on the door on either side of my head and leaned in close, pinning me against it. I trembled like a downy rabbit caught in the clutches of a wolf. The wolf came closer. He bent his head and began nuzzling my cheek. The problem was…I wanted the wolf to devour me.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 3 ~~~~\n",
|
||||
"auto-generated id: 9170804c-a155-473b-ab93-8a561dd48f91\n",
|
||||
"auto-generated id: 94f1b9fb-ad57-4f65-b470-7f49dd6c274c\n",
|
||||
"author: Ray Bradbury\n",
|
||||
"quote: Stuff your eyes with wonder,\" he said, \"live as if you'd drop dead in ten seconds. See the world. It's more fantastic than any dream made or paid for in factories. Ask no guarantees, ask for no security, there never was such an animal. And if there were, it would be related to the great sloth which hangs upside down in a tree all day every day, sleeping its life away. To hell with that,\" he said, \"shake the tree and knock the great sloth down on his ass.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
@@ -607,7 +456,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -615,25 +464,25 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"~~~~ Document 0 ~~~~\n",
|
||||
"auto-generated id: 2c1d6ee1-b742-45ea-bed6-24a1f655c849\n",
|
||||
"auto-generated id: 6d9e67a6-0427-41e6-9e24-050518120d74\n",
|
||||
"author: Roy T. Bennett, The Light in the Heart\n",
|
||||
"quote: Never lose hope. Storms make people stronger and never last forever.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 1 ~~~~\n",
|
||||
"auto-generated id: 5962c2cf-ffb5-4e03-9257-bdd630b5c7e9\n",
|
||||
"auto-generated id: 7d426e59-7935-4bcf-a676-cbe8dd4860e7\n",
|
||||
"author: Roy T. Bennett, The Light in the Heart\n",
|
||||
"quote: Difficulties and adversities viciously force all their might on us and cause us to fall apart, but they are necessary elements of individual growth and reveal our true potential. We have got to endure and overcome them, and move forward. Never lose hope. Storms make people stronger and never last forever.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 2 ~~~~\n",
|
||||
"auto-generated id: 3bbcc4ca-de89-4196-9a46-190a50bf6c47\n",
|
||||
"auto-generated id: 6ec05e48-d162-440d-8819-001d2f3712f9\n",
|
||||
"author: Vincent van Gogh, The Letters of Vincent van Gogh\n",
|
||||
"quote: There is peace even in the storm\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
"\n",
|
||||
"~~~~ Document 3 ~~~~\n",
|
||||
"auto-generated id: 37d8cf02-fc2f-429d-b2b6-260a05286108\n",
|
||||
"auto-generated id: d3c3de59-4da4-4ae6-8f6d-83ed905dd320\n",
|
||||
"author: Edwin Morgan, A Book of Lives\n",
|
||||
"quote: Valentine WeatherKiss me with rain on your eyelashes,come on, let us sway together,under the trees, and to hell with thunder.\n",
|
||||
"~~~~~~~~~~~~~~~~~~~~\n",
|
||||
@@ -665,7 +514,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -684,7 +533,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -698,7 +547,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -7,9 +7,11 @@
|
||||
"source": [
|
||||
"# Astra DB Vector Store\n",
|
||||
"\n",
|
||||
"This page provides a quickstart for using [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as a Vector Store.\n",
|
||||
"This page provides a quickstart for using Astra DB as a Vector Store.\n",
|
||||
"\n",
|
||||
"> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"> [DataStax Astra DB](https://docs.datastax.com/en/astra-db-serverless/index.html) is a serverless \n",
|
||||
"> AI-ready database built on `Apache Cassandra®` and made conveniently available \n",
|
||||
"> through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"## Setup"
|
||||
]
|
||||
@@ -19,6 +21,8 @@
|
||||
"id": "dbe7c156-0413-47e3-9237-4769c4248869",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Dependencies\n",
|
||||
"\n",
|
||||
"Use of the integration requires the `langchain-astradb` partner package:"
|
||||
]
|
||||
},
|
||||
@@ -26,10 +30,15 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8d00fcf4-9798-4289-9214-d9734690adfc",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install -qU \"langchain-astradb>=0.3.3\""
|
||||
"!pip install \\\n",
|
||||
" \"langchain>=0.3.23,<0.4\" \\\n",
|
||||
" \"langchain-core>=0.3.52,<0.4\" \\\n",
|
||||
" \"langchain-astradb>=0.6,<0.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -41,30 +50,40 @@
|
||||
"\n",
|
||||
"In order to use the AstraDB vector store, you must first head to the [AstraDB website](https://astra.datastax.com), create an account, and then create a new database - the initialization might take a few minutes. \n",
|
||||
"\n",
|
||||
"Once the database has been initialized, you should [create an application token](https://docs.datastax.com/en/astra-db-serverless/administration/manage-application-tokens.html#generate-application-token) and save it for later use. \n",
|
||||
"Once the database has been initialized, retrieve your [connection secrets](https://docs.datastax.com/en/astra-db-serverless/get-started/quickstart.html#create-a-database-and-store-your-credentials), which you'll need momentarily. These are:\n",
|
||||
"- an **`API Endpoint`**, such as `\"https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com/\"`\n",
|
||||
"- and a **`Database Token`**, e.g. `\"AstraCS:aBcD123......\"`\n",
|
||||
"\n",
|
||||
"You will also want to copy the `API Endpoint` from the `Database Details` and store that in the `ASTRA_DB_API_ENDPOINT` variable.\n",
|
||||
"\n",
|
||||
"You may optionally provide a namespace, which you can manage from the `Data Explorer` tab of your database dashboard. If you don't wish to set a namespace, you can leave the `getpass` prompt for `ASTRA_DB_NAMESPACE` empty."
|
||||
"You may optionally provide a **`keyspace`** (called \"namespace\" in the LangChain components), which you can manage from the `Data Explorer` tab of your database dashboard. If you wish, you can leave it empty in the prompt below and fall back to a default keyspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 1,
|
||||
"id": "b7843c22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = ········\n",
|
||||
"(optional) ASTRA_DB_KEYSPACE = \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"ASTRA_DB_API_ENDPOINT = getpass.getpass(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n",
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \").strip()\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \").strip()\n",
|
||||
"\n",
|
||||
"desired_namespace = getpass.getpass(\"ASTRA_DB_NAMESPACE = \")\n",
|
||||
"if desired_namespace:\n",
|
||||
" ASTRA_DB_NAMESPACE = desired_namespace\n",
|
||||
"desired_keyspace = input(\"(optional) ASTRA_DB_KEYSPACE = \").strip()\n",
|
||||
"if desired_keyspace:\n",
|
||||
" ASTRA_DB_KEYSPACE = desired_keyspace\n",
|
||||
"else:\n",
|
||||
" ASTRA_DB_NAMESPACE = None"
|
||||
" ASTRA_DB_KEYSPACE = None"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -77,7 +96,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "3cb739c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -93,28 +112,46 @@
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"There are two ways to create an Astra DB vector store, which differ in how the embeddings are computed.\n",
|
||||
"There are various ways to create an Astra DB vector store:\n",
|
||||
"\n",
|
||||
"#### Method 1: Explicit embeddings\n",
|
||||
"\n",
|
||||
"You can separately instantiate a `langchain_core.embeddings.Embeddings` class and pass it to the `AstraDBVectorStore` constructor, just like with most other LangChain vector stores.\n",
|
||||
"\n",
|
||||
"#### Method 2: Integrated embedding computation\n",
|
||||
"#### Method 2: Server-side embeddings ('vectorize')\n",
|
||||
"\n",
|
||||
"Alternatively, you can use the [Vectorize](https://www.datastax.com/blog/simplifying-vector-embedding-generation-with-astra-vectorize) feature of Astra DB and simply specify the name of a supported embedding model when creating the store. The embedding computations are entirely handled within the database. (To proceed with this method, you must have enabled the desired embedding integration for your database, as described [in the docs](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html).)\n",
|
||||
"Alternatively, you can use the [server-side embedding computation](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html) feature of Astra DB ('vectorize') and simply specify an embedding model when creating the server infrastructure for the store. The embedding computations will then be entirely handled within the database in subsequent read and write operations. (To proceed with this method, you must have enabled the desired embedding integration for your database, as described [in the docs](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html).)\n",
|
||||
"\n",
|
||||
"### Explicit Embedding Initialization\n",
|
||||
"#### Method 3: Auto-detect from a pre-existing collection\n",
|
||||
"\n",
|
||||
"Below, we instantiate our vector store using the explicit embedding class:\n",
|
||||
"You may already have a [collection](https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html) in your Astra DB, possibly pre-populated with data through other means (e.g. via the Astra UI or a third-party application), and just want to start querying it within LangChain. In this case, the right approach is to enable the `autodetect_collection` mode in the vector store constructor and let the class figure out the details. (Of course, if your collection has no 'vectorize', you still need to provide an `Embeddings` object).\n",
|
||||
"\n",
|
||||
"#### A note on \"hybrid search\"\n",
|
||||
"\n",
|
||||
"Astra DB vector stores support metadata search in vector searches; furthermore, version 0.6 introduced full support for _hybrid search_ through the [findAndRerank](https://docs.datastax.com/en/astra-db-serverless/api-reference/document-methods/find-and-rerank.html) database primitive: documents are retrieved from both a vector-similarity _and_ a keyword-based (\"lexical\") search, and are then merged through a reranker model. This search strategy, entirely handled on server-side, can boost the accuracy of your results, thus improving the quality of your RAG application. Whenever available, hybrid search is used automatically by the vector store (though you can exert manual control over it if you wish to do so).\n",
|
||||
"\n",
|
||||
"#### Additional information\n",
|
||||
"\n",
|
||||
"The `AstraDBVectorStore` can be configured in many ways; see the [API Reference](https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html) for a full guide covering e.g. asynchronous initialization; non-Astra-DB databases; custom indexing allow-/deny-lists; manual hybrid-search control; and much more."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8d7e33e0-f948-47b5-a9c2-6407fdde170e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explicit embedding initialization (method 1)\n",
|
||||
"\n",
|
||||
"Instantiate our vector store using an explicit embedding class:\n",
|
||||
"\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n"
|
||||
"<EmbeddingTabs/>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 3,
|
||||
"id": "d71a1dcb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -128,19 +165,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 4,
|
||||
"id": "0b32730d-176e-414c-9d91-fd3644c54211",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_astradb import AstraDBVectorStore\n",
|
||||
"\n",
|
||||
"vector_store = AstraDBVectorStore(\n",
|
||||
"vector_store_explicit_embeddings = AstraDBVectorStore(\n",
|
||||
" collection_name=\"astra_vector_langchain\",\n",
|
||||
" embedding=embeddings,\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" namespace=ASTRA_DB_NAMESPACE,\n",
|
||||
" namespace=ASTRA_DB_KEYSPACE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -149,26 +186,26 @@
|
||||
"id": "84a1fe85-a42c-4f15-92e1-f79f1dd43ea2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Integrated Embedding Initialization\n",
|
||||
"### Server-side embedding initialization (\"vectorize\", method 2)\n",
|
||||
"\n",
|
||||
"Here it is assumed that you have\n",
|
||||
"In this example code, it is assumed that you have\n",
|
||||
"\n",
|
||||
"- Enabled the OpenAI integration in your Astra DB organization,\n",
|
||||
"- Added an API Key named `\"OPENAI_API_KEY\"` to the integration, and scoped it to the database you are using.\n",
|
||||
"\n",
|
||||
"For more details on how to do this, please consult the [documentation](https://docs.datastax.com/en/astra-db-serverless/integrations/embedding-providers/openai.html)."
|
||||
"For more details, including instructions to switch provider/model, please consult the [documentation](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "9d18455d-3fa6-4f9e-b687-3a2bc71c9a23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from astrapy.info import CollectionVectorServiceOptions\n",
|
||||
"from astrapy.info import VectorServiceOptions\n",
|
||||
"\n",
|
||||
"openai_vectorize_options = CollectionVectorServiceOptions(\n",
|
||||
"openai_vectorize_options = VectorServiceOptions(\n",
|
||||
" provider=\"openai\",\n",
|
||||
" model_name=\"text-embedding-3-small\",\n",
|
||||
" authentication={\n",
|
||||
@@ -176,125 +213,183 @@
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_store_integrated = AstraDBVectorStore(\n",
|
||||
" collection_name=\"astra_vector_langchain_integrated\",\n",
|
||||
"vector_store_integrated_embeddings = AstraDBVectorStore(\n",
|
||||
" collection_name=\"astra_vectorize_langchain\",\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" namespace=ASTRA_DB_NAMESPACE,\n",
|
||||
" namespace=ASTRA_DB_KEYSPACE,\n",
|
||||
" collection_vector_service_options=openai_vectorize_options,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "24508a60-9591-4b24-a9b7-ecc90ed71b68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Auto-detect initialization (method 3)\n",
|
||||
"\n",
|
||||
"You can use this pattern if the collection already exists on the database and your `AstraDBVectorStore` needs to use it (for reads and writes). The LangChain component will inspect the collection and figure out the details.\n",
|
||||
"\n",
|
||||
"This is the recommended approach if the collection has been created and -- most importantly -- populated by tools other than LangChain, for example if the data has been ingested through the Astra DB Web interface.\n",
|
||||
"\n",
|
||||
"Auto-detect mode cannot coexist with _collection_ settings (such as the similarity metric and such); on the other hand, if no server-side embeddings are employed, one still needs to pass an `Embeddings` object to the constructor.\n",
|
||||
"\n",
|
||||
"In the following example code, we will \"auto-detect\" the very same collection that was created by method 2 above (\"vectorize\"). Hence, no `Embeddings` object needs to be supplied."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "683b0f6e-884f-4a09-bc3a-454bb1eefd30",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store_autodetected = AstraDBVectorStore(\n",
|
||||
" collection_name=\"astra_vectorize_langchain\",\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" namespace=ASTRA_DB_KEYSPACE,\n",
|
||||
" autodetect_collection=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fbcfe8e8-2f4e-4fc7-a332-7a2fa2c401bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Once you have created your vector store, interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"All interactions with the vector store proceed regardless of the initialization method: please **adapt the following cell**, if you desire, to select a vector store you have created and want to put to test."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "54d63f59-1e6b-49b4-a7c1-ac7717c92ac0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If desired, uncomment a different line here:\n",
|
||||
"\n",
|
||||
"# vector_store = vector_store_explicit_embeddings\n",
|
||||
"vector_store = vector_store_integrated_embeddings\n",
|
||||
"# vector_store = vector_store_autodetected"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d3796b39",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
"Add documents to the vector store by using the `add_documents` method.\n",
|
||||
"\n",
|
||||
"_The \"id\" field can be supplied separately, in a matching `ids=[...]` parameter to `add_documents`, or even left out entirely to let the store generate IDs._"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 8,
|
||||
"id": "afb3e155",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[UUID('89a5cea1-5f3d-47c1-89dc-7e36e12cf4de'),\n",
|
||||
" UUID('d4e78c48-f954-4612-8a38-af22923ba23b'),\n",
|
||||
" UUID('058e4046-ded0-4fc1-b8ac-60e5a5f08ea0'),\n",
|
||||
" UUID('50ab2a9a-762c-4b78-b102-942a86d77288'),\n",
|
||||
" UUID('1da5a3c1-ba51-4f2f-aaaf-79a8f5011ce3'),\n",
|
||||
" UUID('f3055d9e-2eb1-4d25-838e-2c70548f91b5'),\n",
|
||||
" UUID('4bf0613d-08d0-4fbc-a43c-4955e4c9e616'),\n",
|
||||
" UUID('18008625-8fd4-45c2-a0d7-92a2cde23dbc'),\n",
|
||||
" UUID('c712e06f-790b-4fd4-9040-7ab3898965d0'),\n",
|
||||
" UUID('a9b84820-3445-4810-a46c-e77b76ab85bc')]"
|
||||
"['entry_00',\n",
|
||||
" 'entry_01',\n",
|
||||
" 'entry_02',\n",
|
||||
" 'entry_03',\n",
|
||||
" 'entry_04',\n",
|
||||
" 'entry_05',\n",
|
||||
" 'entry_06',\n",
|
||||
" 'entry_07',\n",
|
||||
" 'entry_08',\n",
|
||||
" 'entry_09',\n",
|
||||
" 'entry_10']"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocolate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"documents_to_insert = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"ZYX, just another tool in the world, is actually my agent-based superhero\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=\"entry_00\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"I had chocolate chip pancakes and scrambled eggs \"\n",
|
||||
" \"for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=\"entry_01\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and \"\n",
|
||||
" \"overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
" id=\"entry_02\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain \"\n",
|
||||
" \"- come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=\"entry_03\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole \"\n",
|
||||
" \"$1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
" id=\"entry_04\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Thanks to her sophisticated language skills, the agent \"\n",
|
||||
" \"managed to extract strategic information all right.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=\"entry_05\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this \"\n",
|
||||
" \"review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
" id=\"entry_06\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
" id=\"entry_07\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, \"\n",
|
||||
" \"agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=\"entry_08\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to \"\n",
|
||||
" \"fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
" id=\"entry_09\",\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=\"entry_10\",\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents_to_insert)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -304,12 +399,12 @@
|
||||
"source": [
|
||||
"### Delete items from vector store\n",
|
||||
"\n",
|
||||
"We can delete items from our vector store by ID by using the `delete` function."
|
||||
"Delete items by ID by using the `delete` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 9,
|
||||
"id": "d3f69315",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -319,13 +414,13 @@
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.delete(ids=uuids[-1])"
|
||||
"vector_store.delete(ids=[\"entry_10\", \"entry_02\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -333,20 +428,20 @@
|
||||
"id": "d12e1a07",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"## Query the vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"Once the vector store is created and populated, you can query it (e.g. as part of your chain or agent).\n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"#### Similarity search\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search with filtering on metadata can be done as follows:"
|
||||
"Search for documents similar to a provided text, with additional metadata filters if desired:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 10,
|
||||
"id": "770b3467",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -354,19 +449,20 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
|
||||
"* \"Building an exciting new project with LangChain - come check it out!\", metadata={'source': 'tweet'}\n",
|
||||
"* \"LangGraph is the best framework for building stateful, agentic applications!\", metadata={'source': 'tweet'}\n",
|
||||
"* \"Thanks to her sophisticated language skills, the agent managed to extract strategic information all right.\", metadata={'source': 'tweet'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=2,\n",
|
||||
" k=3,\n",
|
||||
" filter={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
" print(f'* \"{res.page_content}\", metadata={res.metadata}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -376,12 +472,12 @@
|
||||
"source": [
|
||||
"#### Similarity search with score\n",
|
||||
"\n",
|
||||
"You can also search with score:"
|
||||
"You can return the similarity score as well:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 11,
|
||||
"id": "5924309a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -389,16 +485,69 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.776585] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
|
||||
"* [SIM=0.71] \"Building an exciting new project with LangChain - come check it out!\", metadata={'source': 'tweet'}\n",
|
||||
"* [SIM=0.70] \"LangGraph is the best framework for building stateful, agentic applications!\", metadata={'source': 'tweet'}\n",
|
||||
"* [SIM=0.61] \"Thanks to her sophisticated language skills, the agent managed to extract strategic information all right.\", metadata={'source': 'tweet'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=3,\n",
|
||||
" filter={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
" print(f'* [SIM={score:.2f}] \"{res.page_content}\", metadata={res.metadata}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73b8f418-91a7-46d0-91c3-3c76e9586193",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Specify a different keyword query (requires hybrid search)\n",
|
||||
"\n",
|
||||
"> Note: this cell can be run only if the collection supports the [find-and-rerank](https://docs.datastax.com/en/astra-db-serverless/api-reference/document-methods/find-and-rerank.html) command and if the vector store is aware of this fact.\n",
|
||||
"\n",
|
||||
"If the vector store is using a hybrid-enabled collection and has detected this fact, by default it will use that capability when running searches.\n",
|
||||
"\n",
|
||||
"In that case, the same query text is used for both the vector-similarity and the lexical-based retrieval steps in the find-and-rerank process, _unless you explicitly provide a different query for the latter_:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "e282a48b-081a-4d94-9483-33407e8d6da7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* \"Building an exciting new project with LangChain - come check it out!\", metadata={'source': 'tweet'}\n",
|
||||
"* \"LangGraph is the best framework for building stateful, agentic applications!\", metadata={'source': 'tweet'}\n",
|
||||
"* \"ZYX, just another tool in the world, is actually my agent-based superhero\", metadata={'source': 'tweet'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store_autodetected.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=3,\n",
|
||||
" filter={\"source\": \"tweet\"},\n",
|
||||
" lexical_query=\"agent\",\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f'* \"{res.page_content}\", metadata={res.metadata}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60688e8c-d74d-4921-b213-b48d88600f95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"_The above example hardcodes the \"autodetected\" vector store, which has surely inspected the collection and figured out if hybrid is available. Another option is to explicitly supply hybrid-search parameters to the constructor (refer to the API Reference for more details/examples)._"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -408,7 +557,9 @@
|
||||
"source": [
|
||||
"#### Other search methods\n",
|
||||
"\n",
|
||||
"There are a variety of other search methods that are not covered in this notebook, such as MMR search or searching by vector. For a full list of the search abilities available for `AstraDBVectorStore` check out the [API reference](https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html)."
|
||||
"There are a variety of other search methods that are not covered in this notebook, such as MMR search and search by vector.\n",
|
||||
"\n",
|
||||
"For a full list of the search modes available in `AstraDBVectorStore` check out the [API reference](https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -418,24 +569,24 @@
|
||||
"source": [
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. \n",
|
||||
"You can also make the vector store into a retriever, for easier usage in your chains. \n",
|
||||
"\n",
|
||||
"Here is how to transform your vector store into a retriever and then invoke the retreiever with a simple query and filter."
|
||||
"Transform the vector store into a retriever and invoke it with a simple query + metadata filter:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 13,
|
||||
"id": "dcee50e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
"[Document(id='entry_04', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -490,7 +641,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 14,
|
||||
"id": "fd405a13-6f71-46fa-87e6-167238e9c25e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -505,7 +656,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `AstraDBVectorStore` features and configurations head to the API reference: https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html"
|
||||
"For detailed documentation of all `AstraDBVectorStore` features and configurations, consult the [API reference](https://python.langchain.com/api_reference/astradb/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -525,7 +676,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.12.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
450
docs/docs/integrations/vectorstores/gel.ipynb
Normal file
450
docs/docs/integrations/vectorstores/gel.ipynb
Normal file
@@ -0,0 +1,450 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7679dd7b-7ed4-4755-a499-824deadba708",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gel \n",
|
||||
"\n",
|
||||
"> An implementation of LangChain vectorstore abstraction using `gel` as the backend.\n",
|
||||
"\n",
|
||||
"[Gel](https://www.geldata.com/) is an open-source PostgreSQL data layer optimized for fast development to production cycle. It comes with a high-level strictly typed graph-like data model, composable hierarchical query language, full SQL support, migrations, Auth and AI modules.\n",
|
||||
"\n",
|
||||
"The code lives in an integration package called [langchain-gel](https://github.com/geldata/langchain-gel).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"First install relevant packages:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "92df32f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install -qU gel langchain-gel "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68ef6ebb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"In order to use Gel as a backend for your `VectorStore`, you're going to need a working Gel instance.\n",
|
||||
"Fortunately, it doesn't have to involve Docker containers or anything complicated, unless you want to!\n",
|
||||
"\n",
|
||||
"To set up a local instance, run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b79938d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! gel project init --non-interactive"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "08e79230",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you are using [Gel Cloud](https://cloud.geldata.com/) (and you should!), add one more argument to that command:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"gel project init --server-instance <org-name>/<instance-name>\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"For a comprehensive list of ways to run Gel, take a look at [Running Gel](https://docs.geldata.com/reference/running) section of the reference docs.\n",
|
||||
"\n",
|
||||
"### Set up the schema\n",
|
||||
"\n",
|
||||
"[Gel schema](https://docs.geldata.com/reference/datamodel) is an explicit high-level description of your application's data model. \n",
|
||||
"Aside from enabling you to define exactly how your data is going to be laid out, it drives Gel's many powerful features such as links, access policies, functions, triggers, constraints, indexes, and more.\n",
|
||||
"\n",
|
||||
"The LangChain's `VectorStore` expects the following layout for the schema:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "9a7edd58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema_content = \"\"\"\n",
|
||||
"using extension pgvector;\n",
|
||||
" \n",
|
||||
"module default {\n",
|
||||
" scalar type EmbeddingVector extending ext::pgvector::vector<1536>;\n",
|
||||
"\n",
|
||||
" type Record {\n",
|
||||
" required collection: str;\n",
|
||||
" text: str;\n",
|
||||
" embedding: EmbeddingVector; \n",
|
||||
" external_id: str {\n",
|
||||
" constraint exclusive;\n",
|
||||
" };\n",
|
||||
" metadata: json;\n",
|
||||
"\n",
|
||||
" index ext::pgvector::hnsw_cosine(m := 16, ef_construction := 128)\n",
|
||||
" on (.embedding)\n",
|
||||
" } \n",
|
||||
"}\n",
|
||||
"\"\"\".strip()\n",
|
||||
"\n",
|
||||
"with open(\"dbschema/default.gel\", \"w\") as f:\n",
|
||||
" f.write(schema_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90320ef1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to apply schema changes to the database, run a migration using Gel's [migration mechanism](https://docs.geldata.com/reference/datamodel/migrations):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cdff483e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! gel migration create --non-interactive\n",
|
||||
"! gel migrate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2290ef2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"From this point onward, `GelVectorStore` can be used as a drop-in replacement for any other vectorstore available in LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec44dfcc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "94f5c129",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "979a65bd-742f-4b0d-be1e-c0baae245ec6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_gel import GelVectorStore\n",
|
||||
"\n",
|
||||
"vector_store = GelVectorStore(\n",
|
||||
" embeddings=embeddings,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "61a224a1-d70b-4daf-86ba-ab6e43c08b50",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"Note that adding documents by ID will over-write any existing documents that match that ID."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "88a288cc-ffd4-4800-b011-750c72b9fd10",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"there are cats in the pond\",\n",
|
||||
" metadata={\"id\": \"1\", \"location\": \"pond\", \"topic\": \"animals\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"ducks are also found in the pond\",\n",
|
||||
" metadata={\"id\": \"2\", \"location\": \"pond\", \"topic\": \"animals\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"fresh apples are available at the market\",\n",
|
||||
" metadata={\"id\": \"3\", \"location\": \"market\", \"topic\": \"food\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"the market also sells fresh oranges\",\n",
|
||||
" metadata={\"id\": \"4\", \"location\": \"market\", \"topic\": \"food\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"the new art exhibit is fascinating\",\n",
|
||||
" metadata={\"id\": \"5\", \"location\": \"museum\", \"topic\": \"art\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"a sculpture exhibit is also at the museum\",\n",
|
||||
" metadata={\"id\": \"6\", \"location\": \"museum\", \"topic\": \"art\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"a new coffee shop opened on Main Street\",\n",
|
||||
" metadata={\"id\": \"7\", \"location\": \"Main Street\", \"topic\": \"food\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"the book club meets at the library\",\n",
|
||||
" metadata={\"id\": \"8\", \"location\": \"library\", \"topic\": \"reading\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"the library hosts a weekly story time for kids\",\n",
|
||||
" metadata={\"id\": \"9\", \"location\": \"library\", \"topic\": \"reading\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"a cooking class for beginners is offered at the community center\",\n",
|
||||
" metadata={\"id\": \"10\", \"location\": \"community center\", \"topic\": \"classes\"},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(docs, ids=[doc.metadata[\"id\"] for doc in docs])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c712fa3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a5b2b71f-49eb-407d-b03a-dea4c0a517d6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[\"3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "59f82250-7903-4279-8300-062542c83416",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Filtering Support\n",
|
||||
"\n",
|
||||
"The vectorstore supports a set of filters that can be applied against the metadata fields of the documents.\n",
|
||||
"\n",
|
||||
"| Operator | Meaning/Category |\n",
|
||||
"|----------|-------------------------|\n",
|
||||
"| \\$eq | Equality (==) |\n",
|
||||
"| \\$ne | Inequality (!=) |\n",
|
||||
"| \\$lt | Less than (<) |\n",
|
||||
"| \\$lte | Less than or equal (<=) |\n",
|
||||
"| \\$gt | Greater than (>) |\n",
|
||||
"| \\$gte | Greater than or equal (>=) |\n",
|
||||
"| \\$in | Special Cased (in) |\n",
|
||||
"| \\$nin | Special Cased (not in) |\n",
|
||||
"| \\$between | Special Cased (between) |\n",
|
||||
"| \\$like | Text (like) |\n",
|
||||
"| \\$ilike | Text (case-insensitive like) |\n",
|
||||
"| \\$and | Logical (and) |\n",
|
||||
"| \\$or | Logical (or) |\n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f15a2359-6dc3-4099-8214-785f167a9ca4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"kitty\", k=10, filter={\"id\": {\"$in\": [\"1\", \"5\", \"2\", \"9\"]}}\n",
|
||||
")\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d92ea049-1b1f-4ae9-9525-35750fe2e52e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you provide a dict with multiple fields, but no operators, the top level will be interpreted as a logical **AND** filter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "88f919e4-e4b0-4b5f-99b3-24c675c26d33",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.similarity_search(\n",
|
||||
" \"ducks\",\n",
|
||||
" k=10,\n",
|
||||
" filter={\n",
|
||||
" \"id\": {\"$in\": [\"1\", \"5\", \"2\", \"9\"]},\n",
|
||||
" \"location\": {\"$in\": [\"pond\", \"market\"]},\n",
|
||||
" },\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "88f423a4-6575-4fb8-9be2-a3da01106591",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.similarity_search(\n",
|
||||
" \"ducks\",\n",
|
||||
" k=10,\n",
|
||||
" filter={\n",
|
||||
" \"$and\": [\n",
|
||||
" {\"id\": {\"$in\": [\"1\", \"5\", \"2\", \"9\"]}},\n",
|
||||
" {\"location\": {\"$in\": [\"pond\", \"market\"]}},\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e65adc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to execute a similarity search and receive the corresponding scores you can run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7d92e7b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(query=\"cats\", k=1)\n",
|
||||
"for doc, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8d40db8c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7cd1fb75",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(search_kwargs={\"k\": 1})\n",
|
||||
"retriever.invoke(\"kitty\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ecd77a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials](/docs/tutorials/)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/docs/concepts/retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "33a5f0e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all GelVectorStore features and configurations head to the API reference: https://python.langchain.com/api_reference/"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -553,7 +553,10 @@
|
||||
"cell_type": "markdown",
|
||||
"id": "8edb47106e1a46a883d545849b8ab81b",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
@@ -576,6 +579,9 @@
|
||||
"id": "10185d26023b46108eb7d9f57d49d2b3",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
@@ -603,7 +609,10 @@
|
||||
"cell_type": "markdown",
|
||||
"id": "8763a12b2bbd4a93a75aff182afb95dc",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"> - When you use `BM25BuiltInFunction`, please note that the full-text search is available in Milvus Standalone and Milvus Distributed, but not in Milvus Lite, although it is on the roadmap for future inclusion. It will also be available in Zilliz Cloud (fully-managed Milvus) soon. Please reach out to support@zilliz.com for more information.\n",
|
||||
@@ -617,7 +626,10 @@
|
||||
"cell_type": "markdown",
|
||||
"id": "7623eae2785240b9bd12b16a66d81610",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"### Rerank the candidates\n",
|
||||
@@ -632,6 +644,9 @@
|
||||
"id": "7cdc8c89c7104fffa095e18ddfef8986",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
@@ -645,14 +660,6 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3965036",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more information about Full-text search and Hybrid search, please refer to the [Using Full-Text Search with LangChain and Milvus](https://milvus.io/docs/full_text_search_with_langchain.md) and [Hybrid Retrieval with LangChain and Milvus](https://milvus.io/docs/milvus_hybrid_search_retriever.md)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8ac953f1",
|
||||
@@ -813,7 +820,7 @@
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -827,7 +834,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.0"
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install -qU langchain-pinecone pinecone-notebooks"
|
||||
"pip install -qU langchain langchain-pinecone langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -49,7 +49,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "eb554814",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -57,7 +57,7 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from pinecone import Pinecone, ServerlessSpec\n",
|
||||
"from pinecone import Pinecone\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"PINECONE_API_KEY\"):\n",
|
||||
" os.environ[\"PINECONE_API_KEY\"] = getpass.getpass(\"Enter your Pinecone API key: \")\n",
|
||||
@@ -98,59 +98,41 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 4,
|
||||
"id": "276a06dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"from pinecone import ServerlessSpec\n",
|
||||
"\n",
|
||||
"index_name = \"langchain-test-index\" # change if desired\n",
|
||||
"\n",
|
||||
"existing_indexes = [index_info[\"name\"] for index_info in pc.list_indexes()]\n",
|
||||
"\n",
|
||||
"if index_name not in existing_indexes:\n",
|
||||
"if not pc.has_index(index_name):\n",
|
||||
" pc.create_index(\n",
|
||||
" name=index_name,\n",
|
||||
" dimension=3072,\n",
|
||||
" dimension=1536,\n",
|
||||
" metric=\"cosine\",\n",
|
||||
" spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
|
||||
" )\n",
|
||||
" while not pc.describe_index(index_name).status[\"ready\"]:\n",
|
||||
" time.sleep(1)\n",
|
||||
"\n",
|
||||
"index = pc.Index(index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a4d377f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that our Pinecone index is setup, we can initialize our vector store. \n",
|
||||
"\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 5,
|
||||
"id": "1485db56",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 6,
|
||||
"id": "6e104aee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -176,30 +158,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": null,
|
||||
"id": "70e688f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['167b8681-5974-467f-adcb-6e987a18df01',\n",
|
||||
" 'd16010fd-41f8-4d49-9c22-c66d5555a3fe',\n",
|
||||
" 'ffcacfb3-2bc2-44c3-a039-c2256a905c0e',\n",
|
||||
" 'cf3bfc9f-5dc7-4f5e-bb41-edb957394126',\n",
|
||||
" 'e99b07eb-fdff-4cb9-baa8-619fd8efeed3',\n",
|
||||
" '68c93033-a24f-40bd-8492-92fa26b631a4',\n",
|
||||
" 'b27a4ecb-b505-4c5d-89ff-526e3d103558',\n",
|
||||
" '4868a9e6-e6fb-4079-b400-4a1dfbf0d4c4',\n",
|
||||
" '921c0e9c-0550-4eb5-9a6c-ed44410788b2',\n",
|
||||
" 'c446fc23-64e8-47e7-8c19-ecf985e9411e']"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
@@ -268,7 +230,6 @@
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
@@ -282,7 +243,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 8,
|
||||
"id": "5b8437cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -306,19 +267,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 9,
|
||||
"id": "ffbcb3fb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
@@ -341,18 +293,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": null,
|
||||
"id": "5fb24583",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.553187] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
@@ -377,25 +321,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": null,
|
||||
"id": "78140e87",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"similarity_score_threshold\",\n",
|
||||
" search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n",
|
||||
" search_kwargs={\"k\": 1, \"score_threshold\": 0.4},\n",
|
||||
")\n",
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
|
||||
]
|
||||
@@ -421,13 +354,13 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all __ModuleName__VectorStore features and configurations head to the API reference: https://python.langchain.com/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html"
|
||||
"For detailed documentation of all features and configurations head to the API reference: https://python.langchain.com/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -441,7 +374,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -234,6 +234,8 @@
|
||||
"_ = vector_store.add_documents(documents=all_splits)\n",
|
||||
"\n",
|
||||
"# Define prompt for question-answering\n",
|
||||
"# N.B. for non-US LangSmith endpoints, you may need to specify\n",
|
||||
"# api_url=\"https://api.smith.langchain.com\" in hub.pull.\n",
|
||||
"prompt = hub.pull(\"rlm/rag-prompt\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -535,7 +537,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"id": "46f378c5-858c-488f-8aef-8b59a6280791",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -553,6 +555,8 @@
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"\n",
|
||||
"# N.B. for non-US LangSmith endpoints, you may need to specify\n",
|
||||
"# api_url=\"https://api.smith.langchain.com\" in hub.pull.\n",
|
||||
"prompt = hub.pull(\"rlm/rag-prompt\")\n",
|
||||
"\n",
|
||||
"example_messages = prompt.invoke(\n",
|
||||
|
||||
@@ -139,7 +139,7 @@ const config = {
|
||||
},
|
||||
announcementBar: {
|
||||
content:
|
||||
'<strong>Join us at <a href="https://interrupt.langchain.com/" target="_blank" rel="noopener noreferrer"> Interrupt: The Agent AI Conference by LangChain</a> on May 13 & 14 in San Francisco!</strong>',
|
||||
'<strong>We are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith. <a href="https://www.langchain.com/careers" target="_blank" rel="noopener noreferrer"> Join our team!</a></strong>',
|
||||
backgroundColor: '#d0c9fe'
|
||||
},
|
||||
prism: {
|
||||
|
||||
@@ -37,6 +37,7 @@ def _reorder_keys(p):
|
||||
"downloads",
|
||||
"downloads_updated_at",
|
||||
"disabled",
|
||||
"include_in_api_ref",
|
||||
]
|
||||
if set(keys) - set(key_order):
|
||||
raise ValueError(f"Unexpected keys: {set(keys) - set(key_order)}")
|
||||
|
||||
@@ -9,8 +9,10 @@ export default function EmbeddingTabs(props) {
|
||||
hideOpenai,
|
||||
azureOpenaiParams,
|
||||
hideAzureOpenai,
|
||||
googleParams,
|
||||
hideGoogle,
|
||||
googleGenAIParams,
|
||||
hideGoogleGenAI,
|
||||
googleVertexAIParams,
|
||||
hideGoogleVertexAI,
|
||||
awsParams,
|
||||
hideAws,
|
||||
huggingFaceParams,
|
||||
@@ -38,7 +40,8 @@ export default function EmbeddingTabs(props) {
|
||||
const azureParamsOrDefault =
|
||||
azureOpenaiParams ??
|
||||
`\n azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],\n azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],\n openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],\n`;
|
||||
const googleParamsOrDefault = googleParams ?? `model="text-embedding-004"`;
|
||||
const googleGenAIParamsOrDefault = googleGenAIParams ?? `model="models/embedding-001"`;
|
||||
const googleVertexAIParamsOrDefault = googleVertexAIParams ?? `model="text-embedding-004"`;
|
||||
const awsParamsOrDefault = awsParams ?? `model_id="amazon.titan-embed-text-v2:0"`;
|
||||
const huggingFaceParamsOrDefault = huggingFaceParams ?? `model_name="sentence-transformers/all-mpnet-base-v2"`;
|
||||
const ollamaParamsOrDefault = ollamaParams ?? `model="llama3"`;
|
||||
@@ -73,13 +76,22 @@ export default function EmbeddingTabs(props) {
|
||||
shouldHide: hideAzureOpenai,
|
||||
},
|
||||
{
|
||||
value: "Google",
|
||||
label: "Google",
|
||||
text: `from langchain_google_vertexai import VertexAIEmbeddings\n\n${embeddingVarName} = VertexAIEmbeddings(${googleParamsOrDefault})`,
|
||||
value: "GoogleGenAI",
|
||||
label: "Google Gemini",
|
||||
text: `from langchain_google_genai import GoogleGenerativeAIEmbeddings\n\n${embeddingVarName} = GoogleGenerativeAIEmbeddings(${googleGenAIParamsOrDefault})`,
|
||||
apiKeyName: "GOOGLE_API_KEY",
|
||||
packageName: "langchain-google-genai",
|
||||
default: false,
|
||||
shouldHide: hideGoogleGenAI,
|
||||
},
|
||||
{
|
||||
value: "GoogleVertexAI",
|
||||
label: "Google Vertex",
|
||||
text: `from langchain_google_vertexai import VertexAIEmbeddings\n\n${embeddingVarName} = VertexAIEmbeddings(${googleVertexAIParamsOrDefault})`,
|
||||
apiKeyName: undefined,
|
||||
packageName: "langchain-google-vertexai",
|
||||
default: false,
|
||||
shouldHide: hideGoogle,
|
||||
shouldHide: hideGoogleVertexAI,
|
||||
},
|
||||
{
|
||||
value: "AWS",
|
||||
|
||||
@@ -461,14 +461,6 @@ const FEATURE_TABLES = {
|
||||
apiLink: "https://python.langchain.com/api_reference/elasticsearch/retrievers/langchain_elasticsearch.retrievers.ElasticsearchRetriever.html",
|
||||
package: "langchain_elasticsearch"
|
||||
},
|
||||
{
|
||||
name: "MilvusCollectionHybridSearchRetriever",
|
||||
link: "milvus_hybrid_search",
|
||||
selfHost: true,
|
||||
cloudOffering: false,
|
||||
apiLink: "https://python.langchain.com/api_reference/milvus/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html",
|
||||
package: "langchain_milvus"
|
||||
},
|
||||
{
|
||||
name: "VertexAISearchRetriever",
|
||||
link: "google_vertex_ai_search",
|
||||
|
||||
@@ -146,6 +146,10 @@
|
||||
"source": "/docs/integrations/retrievers/singlestoredb(/?)",
|
||||
"destination": "https://python.langchain.com/v0.2/docs/integrations/retrievers/singlestoredb/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/providers/dspy(/?)",
|
||||
"destination": "https://python.langchain.com/v0.2/docs/integrations/providers/dspy/"
|
||||
},
|
||||
{
|
||||
"source": "/api_reference/mongodb/:path(.*/?)*",
|
||||
"destination": "https://langchain-mongodb.readthedocs.io/en/latest/langchain_mongodb/api_docs.html"
|
||||
@@ -153,6 +157,10 @@
|
||||
{
|
||||
"source": "/api_reference/tests/:path(.*/?)*",
|
||||
"destination": "/api_reference/standard_tests/:path"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/retrievers/milvus_hybrid_search(/?)",
|
||||
"destination": "https://python.langchain.com/v0.2/docs/integrations/retrievers/milvus_hybrid_search/"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -2,3 +2,5 @@ httpx
|
||||
grpcio
|
||||
aiohttp<3.11
|
||||
protobuf<3.21
|
||||
tenacity
|
||||
urllib3
|
||||
|
||||
@@ -50,7 +50,7 @@ def migrate(
|
||||
"reflect any imports from new packages. For example, if you see new "
|
||||
"imports from langchain_openai, langchain_anthropic or "
|
||||
"langchain_text_splitters you "
|
||||
"should them to your dependencies! \n\n"
|
||||
"should add them to your dependencies! \n\n"
|
||||
'⚠️ This script is a "best-effort", and is likely to make some '
|
||||
"mistakes.\n\n"
|
||||
"🛡️ Backup your code prior to running the migration script -- it will "
|
||||
|
||||
@@ -30,15 +30,15 @@ if TYPE_CHECKING:
|
||||
from .path import as_import_path, get_relative_path
|
||||
|
||||
__all__ = (
|
||||
"LangChainBetaWarning",
|
||||
"LangChainDeprecationWarning",
|
||||
"as_import_path",
|
||||
"beta",
|
||||
"deprecated",
|
||||
"get_relative_path",
|
||||
"LangChainBetaWarning",
|
||||
"LangChainDeprecationWarning",
|
||||
"suppress_langchain_beta_warning",
|
||||
"surface_langchain_beta_warnings",
|
||||
"suppress_langchain_deprecation_warning",
|
||||
"surface_langchain_beta_warnings",
|
||||
"surface_langchain_deprecation_warnings",
|
||||
"warn_deprecated",
|
||||
)
|
||||
|
||||
@@ -54,39 +54,39 @@ if TYPE_CHECKING:
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"dispatch_custom_event",
|
||||
"adispatch_custom_event",
|
||||
"RetrieverManagerMixin",
|
||||
"LLMManagerMixin",
|
||||
"ChainManagerMixin",
|
||||
"ToolManagerMixin",
|
||||
"Callbacks",
|
||||
"CallbackManagerMixin",
|
||||
"RunManagerMixin",
|
||||
"BaseCallbackHandler",
|
||||
"AsyncCallbackHandler",
|
||||
"BaseCallbackManager",
|
||||
"BaseRunManager",
|
||||
"RunManager",
|
||||
"ParentRunManager",
|
||||
"AsyncRunManager",
|
||||
"AsyncParentRunManager",
|
||||
"CallbackManagerForLLMRun",
|
||||
"AsyncCallbackManagerForLLMRun",
|
||||
"CallbackManagerForChainRun",
|
||||
"AsyncCallbackManagerForChainRun",
|
||||
"CallbackManagerForToolRun",
|
||||
"AsyncCallbackManagerForToolRun",
|
||||
"CallbackManagerForRetrieverRun",
|
||||
"AsyncCallbackManagerForRetrieverRun",
|
||||
"CallbackManager",
|
||||
"CallbackManagerForChainGroup",
|
||||
"AsyncCallbackManager",
|
||||
"AsyncCallbackManagerForChainGroup",
|
||||
"AsyncCallbackManagerForChainRun",
|
||||
"AsyncCallbackManagerForLLMRun",
|
||||
"AsyncCallbackManagerForRetrieverRun",
|
||||
"AsyncCallbackManagerForToolRun",
|
||||
"AsyncParentRunManager",
|
||||
"AsyncRunManager",
|
||||
"BaseCallbackHandler",
|
||||
"BaseCallbackManager",
|
||||
"BaseRunManager",
|
||||
"CallbackManager",
|
||||
"CallbackManagerForChainGroup",
|
||||
"CallbackManagerForChainRun",
|
||||
"CallbackManagerForLLMRun",
|
||||
"CallbackManagerForRetrieverRun",
|
||||
"CallbackManagerForToolRun",
|
||||
"CallbackManagerMixin",
|
||||
"Callbacks",
|
||||
"ChainManagerMixin",
|
||||
"FileCallbackHandler",
|
||||
"LLMManagerMixin",
|
||||
"ParentRunManager",
|
||||
"RetrieverManagerMixin",
|
||||
"RunManager",
|
||||
"RunManagerMixin",
|
||||
"StdOutCallbackHandler",
|
||||
"StreamingStdOutCallbackHandler",
|
||||
"FileCallbackHandler",
|
||||
"ToolManagerMixin",
|
||||
"UsageMetadataCallbackHandler",
|
||||
"adispatch_custom_event",
|
||||
"dispatch_custom_event",
|
||||
"get_usage_metadata_callback",
|
||||
)
|
||||
|
||||
|
||||
@@ -520,6 +520,8 @@ class RunManager(BaseRunManager):
|
||||
Returns:
|
||||
Any: The result of the callback.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_text",
|
||||
@@ -542,6 +544,8 @@ class RunManager(BaseRunManager):
|
||||
retry_state (RetryCallState): The retry state.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_retry",
|
||||
@@ -601,6 +605,8 @@ class AsyncRunManager(BaseRunManager, ABC):
|
||||
Returns:
|
||||
Any: The result of the callback.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_text",
|
||||
@@ -623,6 +629,8 @@ class AsyncRunManager(BaseRunManager, ABC):
|
||||
retry_state (RetryCallState): The retry state.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_retry",
|
||||
@@ -675,6 +683,8 @@ class CallbackManagerForLLMRun(RunManager, LLMManagerMixin):
|
||||
The chunk. Defaults to None.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_llm_new_token",
|
||||
@@ -694,6 +704,8 @@ class CallbackManagerForLLMRun(RunManager, LLMManagerMixin):
|
||||
response (LLMResult): The LLM result.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_llm_end",
|
||||
@@ -718,6 +730,8 @@ class CallbackManagerForLLMRun(RunManager, LLMManagerMixin):
|
||||
- response (LLMResult): The response which was generated before
|
||||
the error occurred.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_llm_error",
|
||||
@@ -750,7 +764,6 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
|
||||
inheritable_metadata=self.inheritable_metadata,
|
||||
)
|
||||
|
||||
@shielded
|
||||
async def on_llm_new_token(
|
||||
self,
|
||||
token: str,
|
||||
@@ -766,6 +779,8 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
|
||||
The chunk. Defaults to None.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_llm_new_token",
|
||||
@@ -786,6 +801,8 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
|
||||
response (LLMResult): The LLM result.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_llm_end",
|
||||
@@ -814,6 +831,8 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
|
||||
|
||||
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_llm_error",
|
||||
@@ -836,6 +855,8 @@ class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
|
||||
outputs (Union[dict[str, Any], Any]): The outputs of the chain.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_chain_end",
|
||||
@@ -858,6 +879,8 @@ class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
|
||||
error (Exception or KeyboardInterrupt): The error.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_chain_error",
|
||||
@@ -879,6 +902,8 @@ class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
|
||||
Returns:
|
||||
Any: The result of the callback.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_agent_action",
|
||||
@@ -900,6 +925,8 @@ class CallbackManagerForChainRun(ParentRunManager, ChainManagerMixin):
|
||||
Returns:
|
||||
Any: The result of the callback.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_agent_finish",
|
||||
@@ -942,6 +969,8 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
|
||||
outputs (Union[dict[str, Any], Any]): The outputs of the chain.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_chain_end",
|
||||
@@ -965,6 +994,8 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
|
||||
error (Exception or KeyboardInterrupt): The error.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_chain_error",
|
||||
@@ -976,7 +1007,6 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@shielded
|
||||
async def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
|
||||
"""Run when agent action is received.
|
||||
|
||||
@@ -987,6 +1017,8 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
|
||||
Returns:
|
||||
Any: The result of the callback.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_agent_action",
|
||||
@@ -998,7 +1030,6 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@shielded
|
||||
async def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
|
||||
"""Run when agent finish is received.
|
||||
|
||||
@@ -1009,6 +1040,8 @@ class AsyncCallbackManagerForChainRun(AsyncParentRunManager, ChainManagerMixin):
|
||||
Returns:
|
||||
Any: The result of the callback.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_agent_finish",
|
||||
@@ -1035,6 +1068,8 @@ class CallbackManagerForToolRun(ParentRunManager, ToolManagerMixin):
|
||||
output (Any): The output of the tool.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_tool_end",
|
||||
@@ -1057,6 +1092,8 @@ class CallbackManagerForToolRun(ParentRunManager, ToolManagerMixin):
|
||||
error (Exception or KeyboardInterrupt): The error.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_tool_error",
|
||||
@@ -1089,7 +1126,6 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
|
||||
inheritable_metadata=self.inheritable_metadata,
|
||||
)
|
||||
|
||||
@shielded
|
||||
async def on_tool_end(self, output: Any, **kwargs: Any) -> None:
|
||||
"""Async run when the tool ends running.
|
||||
|
||||
@@ -1097,6 +1133,8 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
|
||||
output (Any): The output of the tool.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_tool_end",
|
||||
@@ -1108,7 +1146,6 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@shielded
|
||||
async def on_tool_error(
|
||||
self,
|
||||
error: BaseException,
|
||||
@@ -1120,6 +1157,8 @@ class AsyncCallbackManagerForToolRun(AsyncParentRunManager, ToolManagerMixin):
|
||||
error (Exception or KeyboardInterrupt): The error.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_tool_error",
|
||||
@@ -1146,6 +1185,8 @@ class CallbackManagerForRetrieverRun(ParentRunManager, RetrieverManagerMixin):
|
||||
documents (Sequence[Document]): The retrieved documents.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_retriever_end",
|
||||
@@ -1168,6 +1209,8 @@ class CallbackManagerForRetrieverRun(ParentRunManager, RetrieverManagerMixin):
|
||||
error (BaseException): The error.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_retriever_error",
|
||||
@@ -1213,6 +1256,8 @@ class AsyncCallbackManagerForRetrieverRun(
|
||||
documents (Sequence[Document]): The retrieved documents.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_retriever_end",
|
||||
@@ -1236,6 +1281,8 @@ class AsyncCallbackManagerForRetrieverRun(
|
||||
error (BaseException): The error.
|
||||
**kwargs (Any): Additional keyword arguments.
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
"on_retriever_error",
|
||||
@@ -1521,6 +1568,8 @@ class CallbackManager(BaseCallbackManager):
|
||||
|
||||
.. versionadded:: 0.2.14
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
if kwargs:
|
||||
msg = (
|
||||
"The dispatcher API does not accept additional keyword arguments."
|
||||
@@ -1998,6 +2047,8 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
|
||||
.. versionadded:: 0.2.14
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ __all__ = (
|
||||
"BaseLoader",
|
||||
"Blob",
|
||||
"BlobLoader",
|
||||
"PathLike",
|
||||
"LangSmithLoader",
|
||||
"PathLike",
|
||||
)
|
||||
|
||||
_dynamic_imports = {
|
||||
|
||||
@@ -50,7 +50,7 @@ class LangSmithLoader(BaseLoader):
|
||||
offset: int = 0,
|
||||
limit: Optional[int] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
filter: Optional[str] = None,
|
||||
filter: Optional[str] = None, # noqa: A002
|
||||
content_key: str = "",
|
||||
format_content: Optional[Callable[..., str]] = None,
|
||||
client: Optional[LangSmithClient] = None,
|
||||
|
||||
@@ -14,7 +14,7 @@ if TYPE_CHECKING:
|
||||
from .compressor import BaseDocumentCompressor
|
||||
from .transformers import BaseDocumentTransformer
|
||||
|
||||
__all__ = ("Document", "BaseDocumentTransformer", "BaseDocumentCompressor")
|
||||
__all__ = ("BaseDocumentCompressor", "BaseDocumentTransformer", "Document")
|
||||
|
||||
_dynamic_imports = {
|
||||
"Document": "base",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user