mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-17 12:04:45 +00:00
Compare commits
55 Commits
cc/pinecon
...
langchain-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
184ea8aeb2 | ||
|
|
ac52021097 | ||
|
|
c616b445f2 | ||
|
|
628145b172 | ||
|
|
97a5bc7fc7 | ||
|
|
f0226135e5 | ||
|
|
36ff83a0b5 | ||
|
|
d5360b9bd6 | ||
|
|
595297e2e5 | ||
|
|
75663f2cae | ||
|
|
667d2a57fd | ||
|
|
c6388d736b | ||
|
|
4bc6cb759f | ||
|
|
5eb4dc5e06 | ||
|
|
1051fa5729 | ||
|
|
eaf2fb287f | ||
|
|
1a38948ee3 | ||
|
|
288613d361 | ||
|
|
4867fe7ac8 | ||
|
|
bea5798b04 | ||
|
|
d1cf10373b | ||
|
|
4278046329 | ||
|
|
05554265b4 | ||
|
|
21eb39dff0 | ||
|
|
44b41b699c | ||
|
|
b05543c69b | ||
|
|
30badd7a32 | ||
|
|
76172511fd | ||
|
|
efadad6067 | ||
|
|
4ab04ad6be | ||
|
|
d9b856abad | ||
|
|
c55af44711 | ||
|
|
cdf3a17e55 | ||
|
|
1bf6576709 | ||
|
|
e156b372fb | ||
|
|
335ca3a606 | ||
|
|
4c0217681a | ||
|
|
e64bfb537f | ||
|
|
689592f9bb | ||
|
|
0e3115330d | ||
|
|
e4ceafa1c8 | ||
|
|
8ef7f3eacc | ||
|
|
c115c09b6d | ||
|
|
349b5c91c2 | ||
|
|
f980144e9c | ||
|
|
bbc3e3b2cf | ||
|
|
62074bac60 | ||
|
|
5c2fbb5b86 | ||
|
|
0a54aedb85 | ||
|
|
8de8519daf | ||
|
|
7d3fb21807 | ||
|
|
7594ad694f | ||
|
|
b1d3e25eb6 | ||
|
|
4819b500e8 | ||
|
|
46fd09ffeb |
24
.github/scripts/prep_api_docs_build.py
vendored
24
.github/scripts/prep_api_docs_build.py
vendored
@@ -64,19 +64,29 @@ def main():
|
||||
try:
|
||||
# Load packages configuration
|
||||
package_yaml = load_packages_yaml()
|
||||
packages = [
|
||||
|
||||
# Clean target directories
|
||||
clean_target_directories([
|
||||
p
|
||||
for p in package_yaml["packages"]
|
||||
if p["repo"].startswith("langchain-ai/")
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
])
|
||||
|
||||
# Move libraries to their new locations
|
||||
move_libraries([
|
||||
p
|
||||
for p in package_yaml["packages"]
|
||||
if not p.get("disabled", False)
|
||||
and p["repo"].startswith("langchain-ai/")
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
]
|
||||
])
|
||||
|
||||
# Clean target directories
|
||||
clean_target_directories(packages)
|
||||
|
||||
# Move libraries to their new locations
|
||||
move_libraries(packages)
|
||||
# Delete ones without a pyproject.toml
|
||||
for partner in Path("langchain/libs/partners").iterdir():
|
||||
if partner.is_dir() and not (partner / "pyproject.toml").exists():
|
||||
print(f"Removing {partner} as it does not have a pyproject.toml")
|
||||
shutil.rmtree(partner)
|
||||
|
||||
print("Library sync completed successfully!")
|
||||
|
||||
|
||||
2
.github/workflows/api_doc_build.yml
vendored
2
.github/workflows/api_doc_build.yml
vendored
@@ -72,7 +72,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
working-directory: langchain
|
||||
run: |
|
||||
python -m uv pip install $(ls ./libs/partners | xargs -I {} echo "./libs/partners/{}")
|
||||
python -m uv pip install $(ls ./libs/partners | xargs -I {} echo "./libs/partners/{}") --overrides ./docs/vercel_overrides.txt
|
||||
python -m uv pip install libs/core libs/langchain libs/text-splitters libs/community libs/experimental libs/standard-tests
|
||||
python -m uv pip install -r docs/api_reference/requirements.txt
|
||||
|
||||
|
||||
@@ -31,8 +31,8 @@
|
||||
"source": [
|
||||
"# Optional\n",
|
||||
"import os\n",
|
||||
"# os.environ['LANGCHAIN_TRACING_V2'] = 'true' # enables tracing\n",
|
||||
"# os.environ['LANGCHAIN_API_KEY'] = <your-api-key>"
|
||||
"# os.environ['LANGSMITH_TRACING'] = 'true' # enables tracing\n",
|
||||
"# os.environ['LANGSMITH_API_KEY'] = <your-api-key>"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -86,15 +86,15 @@
|
||||
"\n",
|
||||
"Environment Variables:\n",
|
||||
"- USER_AGENT: Specifies the user agent string to be used.\n",
|
||||
"- LANGCHAIN_TRACING_V2: Enables or disables tracing for LangChain.\n",
|
||||
"- LANGCHAIN_API_KEY: API key for accessing LangChain services.\n",
|
||||
"- LANGSMITH_TRACING: Enables or disables tracing for LangChain.\n",
|
||||
"- LANGSMITH_API_KEY: API key for accessing LangChain services.\n",
|
||||
"- TAVILY_API_KEY: API key for accessing Tavily services.\n",
|
||||
"\"\"\"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"USER_AGENT\"] = \"myagent\"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = \"xxxx\"\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = \"xxxx\"\n",
|
||||
"os.environ[\"TAVILY_API_KEY\"] = \"tvly-xxxx\""
|
||||
]
|
||||
},
|
||||
|
||||
@@ -124,8 +124,8 @@
|
||||
"# Optional-- If you want to enable Langsmith -- good for debugging\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -156,7 +156,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Ensure you have an HF_TOKEN in your development enviornment:\n",
|
||||
"# Ensure you have an HF_TOKEN in your development environment:\n",
|
||||
"# access tokens can be created or copied from the Hugging Face platform (https://huggingface.co/docs/hub/en/security-tokens)\n",
|
||||
"\n",
|
||||
"# Load MongoDB's embedded_movies dataset from Hugging Face\n",
|
||||
|
||||
@@ -71,9 +71,9 @@
|
||||
"# Optional: LangSmith API keys\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = \"api_key\""
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = \"api_key\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_PROJECT\"] = \"movie-qa\""
|
||||
"os.environ[\"LANGSMITH_PROJECT\"] = \"movie-qa\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -144,8 +144,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import os\n",
|
||||
"# os.environ[\"LANGCHAIN_HANDLER\"] = \"langchain\"\n",
|
||||
"# os.environ[\"LANGCHAIN_SESSION\"] = \"default\" # Make sure this session actually exists."
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_PROJECT\"] = \"default\" # Make sure this session actually exists."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -27,7 +27,7 @@ install-py-deps:
|
||||
$(PYTHON) -m pip install -q --upgrade pip
|
||||
$(PYTHON) -m pip install -q --upgrade uv
|
||||
$(PYTHON) -m uv pip install -q --pre -r vercel_requirements.txt
|
||||
$(PYTHON) -m uv pip install -q --pre $$($(PYTHON) scripts/partner_deps_list.py)
|
||||
$(PYTHON) -m uv pip install -q --pre $$($(PYTHON) scripts/partner_deps_list.py) --overrides vercel_overrides.txt
|
||||
|
||||
generate-files:
|
||||
mkdir -p $(INTERMEDIATE_DIR)
|
||||
|
||||
@@ -90,7 +90,7 @@ LangChain has retrievers for many popular lexical search algorithms / engines.
|
||||
### Vector store
|
||||
|
||||
[Vector stores](/docs/concepts/vectorstores/) are a powerful and efficient way to index and retrieve unstructured data.
|
||||
An vectorstore can be used as a retriever by calling the `as_retriever()` method.
|
||||
A vectorstore can be used as a retriever by calling the `as_retriever()` method.
|
||||
|
||||
```python
|
||||
vectorstore = MyVectorStore()
|
||||
|
||||
@@ -151,10 +151,10 @@ Many vectorstores support [the `k`](/docs/integrations/vectorstores/pinecone/#qu
|
||||
### Metadata filtering
|
||||
|
||||
While vectorstore implement a search algorithm to efficiently search over *all* the embedded documents to find the most similar ones, many also support filtering on metadata.
|
||||
This allows structured filters to reduce the size of the similarity search space. These two concepts work well together:
|
||||
Metadata filtering helps narrow down the search by applying specific conditions such as retrieving documents from a particular source or date range. These two concepts work well together:
|
||||
|
||||
1. **Semantic search**: Query the unstructured data directly, often using via embedding or keyword similarity.
|
||||
2. **Metadata search**: Apply structured query to the metadata, filering specific documents.
|
||||
1. **Semantic search**: Query the unstructured data directly, often via embedding or keyword similarity.
|
||||
2. **Metadata search**: Apply structured query to the metadata, filtering specific documents.
|
||||
|
||||
Vector store support for metadata filtering is typically dependent on the underlying vector store implementation.
|
||||
|
||||
|
||||
@@ -75,8 +75,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -85,8 +85,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -33,8 +33,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -43,8 +43,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Let's suppose we have an agent, and want to visualize the actions it takes and tool outputs it receives. Without any debugging, here's what we see:\n",
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"- Basic usage;\n",
|
||||
"- Parsing of Markdown into elements such as titles, list items, and text.\n",
|
||||
"\n",
|
||||
"LangChain implements an [UnstructuredMarkdownLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html) object which requires the [Unstructured](https://unstructured-io.github.io/unstructured/) package. First we install it:"
|
||||
"LangChain implements an [UnstructuredMarkdownLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html) object which requires the [Unstructured](https://docs.unstructured.io/welcome/) package. First we install it:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -82,8 +82,8 @@
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
|
||||
"\n",
|
||||
"# Uncomment the below to use LangSmith. Not required.\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -68,8 +68,8 @@
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
|
||||
"\n",
|
||||
"# Uncomment the below to use LangSmith. Not required.\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -138,7 +138,7 @@
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"You've now learned how to prompt a model to return XML. Next, check out the [broader guide on obtaining structured output](/docs/how_to/structured_output) for other related techniques."
|
||||
"You've now learned how to prompt a model to return YAML. Next, check out the [broader guide on obtaining structured output](/docs/how_to/structured_output) for other related techniques."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -71,9 +71,9 @@
|
||||
"Note that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"if not os.environ.get(\"LANGCHAIN_API_KEY\"):\n",
|
||||
" os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"if not os.environ.get(\"LANGSMITH_API_KEY\"):\n",
|
||||
" os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"### Components\n",
|
||||
|
||||
@@ -53,8 +53,8 @@
|
||||
"Note that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -46,8 +46,8 @@
|
||||
"Note that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -79,8 +79,8 @@
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
|
||||
"\n",
|
||||
"# Optional, uncomment to trace runs with LangSmith. Sign up here: https://smith.langchain.com.\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -80,8 +80,8 @@
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
|
||||
"\n",
|
||||
"# Optional, uncomment to trace runs with LangSmith. Sign up here: https://smith.langchain.com.\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -85,8 +85,8 @@
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
|
||||
"\n",
|
||||
"# Optional, uncomment to trace runs with LangSmith. Sign up here: https://smith.langchain.com.\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -85,8 +85,8 @@
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
|
||||
"\n",
|
||||
"# Optional, uncomment to trace runs with LangSmith. Sign up here: https://smith.langchain.com.\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -87,8 +87,8 @@
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
|
||||
"\n",
|
||||
"# Optional, uncomment to trace runs with LangSmith. Sign up here: https://smith.langchain.com.\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -55,8 +55,8 @@
|
||||
"source": [
|
||||
"# Using LangSmith is recommended but not required. Uncomment below lines to use.\n",
|
||||
"# import os\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -39,8 +39,8 @@
|
||||
"source": [
|
||||
"# Uncomment the below to use LangSmith. Not required.\n",
|
||||
"# import os\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -36,8 +36,8 @@
|
||||
"source": [
|
||||
"# Uncomment the below to use LangSmith. Not required.\n",
|
||||
"# import os\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -38,8 +38,8 @@
|
||||
"source": [
|
||||
"# Uncomment the below to use LangSmith. Not required.\n",
|
||||
"# import os\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"Some models are capable of [**tool calling**](/docs/concepts/tool_calling) - generating arguments that conform to a specific user-provided schema. This guide will demonstrate how to use those tool cals to actually call a function and properly pass the results back to the model.\n",
|
||||
"Some models are capable of [**tool calling**](/docs/concepts/tool_calling) - generating arguments that conform to a specific user-provided schema. This guide will demonstrate how to use those tool calls to actually call a function and properly pass the results back to the model.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -58,8 +58,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -66,8 +66,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -58,8 +58,8 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# If you'd like to use LangSmith, uncomment the below:\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -78,8 +78,8 @@
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -79,8 +79,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -58,8 +58,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -66,8 +66,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -92,8 +92,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -93,8 +93,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -104,8 +104,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your Langsmith API key: \")"
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your Langsmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -82,8 +82,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -85,8 +85,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -57,8 +57,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -44,8 +44,8 @@
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"uid = uuid.uuid4().hex[:6]\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = \"YOUR API KEY\""
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = \"YOUR API KEY\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -45,9 +45,9 @@
|
||||
"\n",
|
||||
"uid = uuid.uuid4().hex[:6]\n",
|
||||
"project_name = f\"Run Fine-tuning Walkthrough {uid}\"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = \"YOUR API KEY\"\n",
|
||||
"os.environ[\"LANGCHAIN_PROJECT\"] = project_name"
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = \"YOUR API KEY\"\n",
|
||||
"os.environ[\"LANGSMITH_PROJECT\"] = project_name"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
221
docs/docs/integrations/document_loaders/hyperbrowser.ipynb
Normal file
221
docs/docs/integrations/document_loaders/hyperbrowser.ipynb
Normal file
@@ -0,0 +1,221 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HyperbrowserLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Hyperbrowser](https://hyperbrowser.ai) is a platform for running and scaling headless browsers. It lets you launch and manage browser sessions at scale and provides easy to use solutions for any webscraping needs, such as scraping a single page or crawling an entire site.\n",
|
||||
"\n",
|
||||
"Key Features:\n",
|
||||
"- Instant Scalability - Spin up hundreds of browser sessions in seconds without infrastructure headaches\n",
|
||||
"- Simple Integration - Works seamlessly with popular tools like Puppeteer and Playwright\n",
|
||||
"- Powerful APIs - Easy to use APIs for scraping/crawling any site, and much more\n",
|
||||
"- Bypass Anti-Bot Measures - Built-in stealth mode, ad blocking, automatic CAPTCHA solving, and rotating proxies\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with Hyperbrowser [document loader](https://python.langchain.com/docs/concepts/#document-loaders).\n",
|
||||
"\n",
|
||||
"For more information about Hyperbrowser, please visit the [Hyperbrowser website](https://hyperbrowser.ai) or if you want to check out the docs, you can visit the [Hyperbrowser docs](https://docs.hyperbrowser.ai).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| HyperbrowserLoader | langchain-hyperbrowser | ❌ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support |\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| HyperbrowserLoader | ✅ | ✅ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Hyperbrowser document loader you'll need to install the `langchain-hyperbrowser` integration package, and create a Hyperbrowser account and get an API key.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [Hyperbrowser](https://app.hyperbrowser.ai/) to sign up and generate an API key. Once you've done this set the HYPERBROWSER_API_KEY environment variable:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain-hyperbrowser**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-hyperbrowser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_hyperbrowser import HyperbrowserLoader\n",
|
||||
"\n",
|
||||
"loader = HyperbrowserLoader(\n",
|
||||
" urls=\"https://example.com\",\n",
|
||||
" api_key=\"YOUR_API_KEY\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'title': 'Example Domain', 'viewport': 'width=device-width, initial-scale=1', 'sourceURL': 'https://example.com'}, page_content='Example Domain\\n\\n# Example Domain\\n\\nThis domain is for use in illustrative examples in documents. You may use this\\ndomain in literature without prior coordination or asking for permission.\\n\\n[More information...](https://www.iana.org/domains/example)')"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage\n",
|
||||
"\n",
|
||||
"You can specify the operation to be performed by the loader. The default operation is `scrape`. For `scrape`, you can provide a single URL or a list of URLs to be scraped. For `crawl`, you can only provide a single URL. The `crawl` operation will crawl the provided page and subpages and return a document for each page."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = HyperbrowserLoader(\n",
|
||||
" urls=\"https://hyperbrowser.ai\", api_key=\"YOUR_API_KEY\", operation=\"crawl\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Optional params for the loader can also be provided in the `params` argument. For more information on the supported params, visit https://docs.hyperbrowser.ai/reference/sdks/python/scrape#start-scrape-job-and-wait or https://docs.hyperbrowser.ai/reference/sdks/python/crawl#start-crawl-job-and-wait."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = HyperbrowserLoader(\n",
|
||||
" urls=\"https://example.com\",\n",
|
||||
" api_key=\"YOUR_API_KEY\",\n",
|
||||
" operation=\"scrape\",\n",
|
||||
" params={\"scrape_options\": {\"include_tags\": [\"h1\", \"h2\", \"p\"]}},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"- [GitHub](https://github.com/hyperbrowserai/langchain-hyperbrowser/)\n",
|
||||
"- [PyPi](https://pypi.org/project/langchain-hyperbrowser/)\n",
|
||||
"- [Hyperbrowser Docs](https://docs.hyperbrowser.ai/)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,321 +1,363 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e48afb8d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# YouTube audio\n",
|
||||
"\n",
|
||||
"Building chat or QA applications on YouTube videos is a topic of high interest.\n",
|
||||
"\n",
|
||||
"Below we show how to easily go from a `YouTube url` to `audio of the video` to `text` to `chat`!\n",
|
||||
"\n",
|
||||
"We wil use the `OpenAIWhisperParser`, which will use the OpenAI Whisper API to transcribe audio to text, \n",
|
||||
"and the `OpenAIWhisperParserLocal` for local support and running on private clouds or on premise.\n",
|
||||
"\n",
|
||||
"Note: You will need to have an `OPENAI_API_KEY` supplied."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5f34e934",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.blob_loaders.youtube_audio import (\n",
|
||||
" YoutubeAudioLoader,\n",
|
||||
")\n",
|
||||
"from langchain_community.document_loaders.generic import GenericLoader\n",
|
||||
"from langchain_community.document_loaders.parsers import (\n",
|
||||
" OpenAIWhisperParser,\n",
|
||||
" OpenAIWhisperParserLocal,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "85fc12bd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will use `yt_dlp` to download audio for YouTube urls.\n",
|
||||
"\n",
|
||||
"We will use `pydub` to split downloaded audio files (such that we adhere to Whisper API's 25MB file size limit)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb5a6606",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet yt_dlp\n",
|
||||
"%pip install --upgrade --quiet pydub\n",
|
||||
"%pip install --upgrade --quiet librosa"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b0e119f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### YouTube url to text\n",
|
||||
"\n",
|
||||
"Use `YoutubeAudioLoader` to fetch / download the audio files.\n",
|
||||
"\n",
|
||||
"Then, ues `OpenAIWhisperParser()` to transcribe them to text.\n",
|
||||
"\n",
|
||||
"Let's take the first lecture of Andrej Karpathy's YouTube course as an example! "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8682f256",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set a flag to switch between local and remote parsing\n",
|
||||
"# change this to True if you want to use local parsing\n",
|
||||
"local = False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "23e1e134",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[youtube] Extracting URL: https://youtu.be/kCc8FmEb1nY\n",
|
||||
"[youtube] kCc8FmEb1nY: Downloading webpage\n",
|
||||
"[youtube] kCc8FmEb1nY: Downloading android player API JSON\n",
|
||||
"[info] kCc8FmEb1nY: Downloading 1 format(s): 140\n",
|
||||
"[dashsegments] Total fragments: 11\n",
|
||||
"[download] Destination: /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a\n",
|
||||
"[download] 100% of 107.73MiB in 00:00:18 at 5.92MiB/s \n",
|
||||
"[FixupM4a] Correcting container of \"/Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a\"\n",
|
||||
"[ExtractAudio] Not converting audio /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a; file is already in target format m4a\n",
|
||||
"[youtube] Extracting URL: https://youtu.be/VMj-3S1tku0\n",
|
||||
"[youtube] VMj-3S1tku0: Downloading webpage\n",
|
||||
"[youtube] VMj-3S1tku0: Downloading android player API JSON\n",
|
||||
"[info] VMj-3S1tku0: Downloading 1 format(s): 140\n",
|
||||
"[download] /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/The spelled-out intro to neural networks and backpropagation: building micrograd.m4a has already been downloaded\n",
|
||||
"[download] 100% of 134.98MiB\n",
|
||||
"[ExtractAudio] Not converting audio /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/The spelled-out intro to neural networks and backpropagation: building micrograd.m4a; file is already in target format m4a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Two Karpathy lecture videos\n",
|
||||
"urls = [\"https://youtu.be/kCc8FmEb1nY\", \"https://youtu.be/VMj-3S1tku0\"]\n",
|
||||
"\n",
|
||||
"# Directory to save audio files\n",
|
||||
"save_dir = \"~/Downloads/YouTube\"\n",
|
||||
"\n",
|
||||
"# Transcribe the videos to text\n",
|
||||
"if local:\n",
|
||||
" loader = GenericLoader(\n",
|
||||
" YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParserLocal()\n",
|
||||
" )\n",
|
||||
"else:\n",
|
||||
" loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser())\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "72a94fd8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Hello, my name is Andrej and I've been training deep neural networks for a bit more than a decade. And in this lecture I'd like to show you what neural network training looks like under the hood. So in particular we are going to start with a blank Jupyter notebook and by the end of this lecture we will define and train a neural net and you'll get to see everything that goes on under the hood and exactly sort of how that works on an intuitive level. Now specifically what I would like to do is I w\""
|
||||
"cell_type": "markdown",
|
||||
"id": "e48afb8d",
|
||||
"metadata": {
|
||||
"id": "e48afb8d"
|
||||
},
|
||||
"source": [
|
||||
"# YouTube audio\n",
|
||||
"\n",
|
||||
"Building chat or QA applications on YouTube videos is a topic of high interest.\n",
|
||||
"\n",
|
||||
"Below we show how to easily go from a `YouTube url` to `audio of the video` to `text` to `chat`!\n",
|
||||
"\n",
|
||||
"We wil use the `OpenAIWhisperParser`, which will use the OpenAI Whisper API to transcribe audio to text,\n",
|
||||
"and the `OpenAIWhisperParserLocal` for local support and running on private clouds or on premise.\n",
|
||||
"\n",
|
||||
"Note: You will need to have an `OPENAI_API_KEY` supplied."
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Returns a list of Documents, which can be easily viewed or parsed\n",
|
||||
"docs[0].page_content[0:500]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93be6b49",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Building a chat app from YouTube video\n",
|
||||
"\n",
|
||||
"Given `Documents`, we can easily enable chat / question+answering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "1823f042",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7257cda1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Combine doc\n",
|
||||
"combined_docs = [doc.page_content for doc in docs]\n",
|
||||
"text = \" \".join(combined_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "147c0c55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split them\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)\n",
|
||||
"splits = text_splitter.split_text(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f3556703",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build an index\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"vectordb = FAISS.from_texts(splits, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "beaa99db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build a QA chain\n",
|
||||
"qa_chain = RetrievalQA.from_chain_type(\n",
|
||||
" llm=ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n",
|
||||
" chain_type=\"stuff\",\n",
|
||||
" retriever=vectordb.as_retriever(),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "f2239a62",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"We need to zero out the gradient before backprop at each step because the backward pass accumulates gradients in the grad attribute of each parameter. If we don't reset the grad to zero before each backward pass, the gradients will accumulate and add up, leading to incorrect updates and slower convergence. By resetting the grad to zero before each backward pass, we ensure that the gradients are calculated correctly and that the optimization process works as intended.\""
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5f34e934",
|
||||
"metadata": {
|
||||
"id": "5f34e934"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.blob_loaders.youtube_audio import (\n",
|
||||
" YoutubeAudioLoader,\n",
|
||||
")\n",
|
||||
"from langchain_community.document_loaders.generic import GenericLoader\n",
|
||||
"from langchain_community.document_loaders.parsers.audio import (\n",
|
||||
" OpenAIWhisperParser,\n",
|
||||
" OpenAIWhisperParserLocal,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Ask a question!\n",
|
||||
"query = \"Why do we need to zero out the gradient before backprop at each step?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "a8d01098",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'In the context of transformers, an encoder is a component that reads in a sequence of input tokens and generates a sequence of hidden representations. On the other hand, a decoder is a component that takes in a sequence of hidden representations and generates a sequence of output tokens. The main difference between the two is that the encoder is used to encode the input sequence into a fixed-length representation, while the decoder is used to decode the fixed-length representation into an output sequence. In machine translation, for example, the encoder reads in the source language sentence and generates a fixed-length representation, which is then used by the decoder to generate the target language sentence.'"
|
||||
"cell_type": "markdown",
|
||||
"id": "85fc12bd",
|
||||
"metadata": {
|
||||
"id": "85fc12bd"
|
||||
},
|
||||
"source": [
|
||||
"We will use `yt_dlp` to download audio for YouTube urls.\n",
|
||||
"\n",
|
||||
"We will use `pydub` to split downloaded audio files (such that we adhere to Whisper API's 25MB file size limit)."
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What is the difference between an encoder and decoder?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "fe1e77dd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'For any token, x is the input vector that contains the private information of that token, k and q are the key and query vectors respectively, which are produced by forwarding linear modules on x, and v is the vector that is calculated by propagating the same linear module on x again. The key vector represents what the token contains, and the query vector represents what the token is looking for. The vector v is the information that the token will communicate to other tokens if it finds them interesting, and it gets aggregated for the purposes of the self-attention mechanism.'"
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb5a6606",
|
||||
"metadata": {
|
||||
"id": "fb5a6606"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet yt_dlp\n",
|
||||
"%pip install --upgrade --quiet pydub\n",
|
||||
"%pip install --upgrade --quiet librosa"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b0e119f4",
|
||||
"metadata": {
|
||||
"id": "b0e119f4"
|
||||
},
|
||||
"source": [
|
||||
"### YouTube url to text\n",
|
||||
"\n",
|
||||
"Use `YoutubeAudioLoader` to fetch / download the audio files.\n",
|
||||
"\n",
|
||||
"Then, ues `OpenAIWhisperParser()` to transcribe them to text.\n",
|
||||
"\n",
|
||||
"Let's take the first lecture of Andrej Karpathy's YouTube course as an example!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8682f256",
|
||||
"metadata": {
|
||||
"id": "8682f256"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set a flag to switch between local and remote parsing\n",
|
||||
"# change this to True if you want to use local parsing\n",
|
||||
"local = False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23e1e134",
|
||||
"metadata": {
|
||||
"id": "23e1e134",
|
||||
"outputId": "0794ffeb-f912-48cc-e3cb-3b4d6e5221c7"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[youtube] Extracting URL: https://youtu.be/kCc8FmEb1nY\n",
|
||||
"[youtube] kCc8FmEb1nY: Downloading webpage\n",
|
||||
"[youtube] kCc8FmEb1nY: Downloading android player API JSON\n",
|
||||
"[info] kCc8FmEb1nY: Downloading 1 format(s): 140\n",
|
||||
"[dashsegments] Total fragments: 11\n",
|
||||
"[download] Destination: /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a\n",
|
||||
"[download] 100% of 107.73MiB in 00:00:18 at 5.92MiB/s \n",
|
||||
"[FixupM4a] Correcting container of \"/Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a\"\n",
|
||||
"[ExtractAudio] Not converting audio /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a; file is already in target format m4a\n",
|
||||
"[youtube] Extracting URL: https://youtu.be/VMj-3S1tku0\n",
|
||||
"[youtube] VMj-3S1tku0: Downloading webpage\n",
|
||||
"[youtube] VMj-3S1tku0: Downloading android player API JSON\n",
|
||||
"[info] VMj-3S1tku0: Downloading 1 format(s): 140\n",
|
||||
"[download] /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/The spelled-out intro to neural networks and backpropagation: building micrograd.m4a has already been downloaded\n",
|
||||
"[download] 100% of 134.98MiB\n",
|
||||
"[ExtractAudio] Not converting audio /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/The spelled-out intro to neural networks and backpropagation: building micrograd.m4a; file is already in target format m4a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Two Karpathy lecture videos\n",
|
||||
"urls = [\"https://youtu.be/kCc8FmEb1nY\", \"https://youtu.be/VMj-3S1tku0\"]\n",
|
||||
"\n",
|
||||
"# Directory to save audio files\n",
|
||||
"save_dir = \"~/Downloads/YouTube\"\n",
|
||||
"\n",
|
||||
"# Transcribe the videos to text\n",
|
||||
"if local:\n",
|
||||
" loader = GenericLoader(\n",
|
||||
" YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParserLocal()\n",
|
||||
" )\n",
|
||||
"else:\n",
|
||||
" loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser())\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "72a94fd8",
|
||||
"metadata": {
|
||||
"id": "72a94fd8",
|
||||
"outputId": "b024759c-3925-40c1-9c59-2f9dabee0248"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Hello, my name is Andrej and I've been training deep neural networks for a bit more than a decade. And in this lecture I'd like to show you what neural network training looks like under the hood. So in particular we are going to start with a blank Jupyter notebook and by the end of this lecture we will define and train a neural net and you'll get to see everything that goes on under the hood and exactly sort of how that works on an intuitive level. Now specifically what I would like to do is I w\""
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Returns a list of Documents, which can be easily viewed or parsed\n",
|
||||
"docs[0].page_content[0:500]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93be6b49",
|
||||
"metadata": {
|
||||
"id": "93be6b49"
|
||||
},
|
||||
"source": [
|
||||
"### Building a chat app from YouTube video\n",
|
||||
"\n",
|
||||
"Given `Documents`, we can easily enable chat / question+answering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1823f042",
|
||||
"metadata": {
|
||||
"id": "1823f042"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7257cda1",
|
||||
"metadata": {
|
||||
"id": "7257cda1"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Combine doc\n",
|
||||
"combined_docs = [doc.page_content for doc in docs]\n",
|
||||
"text = \" \".join(combined_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "147c0c55",
|
||||
"metadata": {
|
||||
"id": "147c0c55"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split them\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)\n",
|
||||
"splits = text_splitter.split_text(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f3556703",
|
||||
"metadata": {
|
||||
"id": "f3556703"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build an index\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"vectordb = FAISS.from_texts(splits, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "beaa99db",
|
||||
"metadata": {
|
||||
"id": "beaa99db"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build a QA chain\n",
|
||||
"qa_chain = RetrievalQA.from_chain_type(\n",
|
||||
" llm=ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n",
|
||||
" chain_type=\"stuff\",\n",
|
||||
" retriever=vectordb.as_retriever(),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f2239a62",
|
||||
"metadata": {
|
||||
"id": "f2239a62",
|
||||
"outputId": "b8de052d-cb76-44c5-bb0c-57e7398e89e6"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"We need to zero out the gradient before backprop at each step because the backward pass accumulates gradients in the grad attribute of each parameter. If we don't reset the grad to zero before each backward pass, the gradients will accumulate and add up, leading to incorrect updates and slower convergence. By resetting the grad to zero before each backward pass, we ensure that the gradients are calculated correctly and that the optimization process works as intended.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Ask a question!\n",
|
||||
"query = \"Why do we need to zero out the gradient before backprop at each step?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a8d01098",
|
||||
"metadata": {
|
||||
"id": "a8d01098",
|
||||
"outputId": "9d66d66e-fc7f-4ac9-b104-a8e45e962949"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'In the context of transformers, an encoder is a component that reads in a sequence of input tokens and generates a sequence of hidden representations. On the other hand, a decoder is a component that takes in a sequence of hidden representations and generates a sequence of output tokens. The main difference between the two is that the encoder is used to encode the input sequence into a fixed-length representation, while the decoder is used to decode the fixed-length representation into an output sequence. In machine translation, for example, the encoder reads in the source language sentence and generates a fixed-length representation, which is then used by the decoder to generate the target language sentence.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What is the difference between an encoder and decoder?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fe1e77dd",
|
||||
"metadata": {
|
||||
"id": "fe1e77dd",
|
||||
"outputId": "19479403-92c9-471e-8c93-5df4b17f007a"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'For any token, x is the input vector that contains the private information of that token, k and q are the key and query vectors respectively, which are produced by forwarding linear modules on x, and v is the vector that is calculated by propagating the same linear module on x again. The key vector represents what the token contains, and the query vector represents what the token is looking for. The vector v is the information that the token will communicate to other tokens if it finds them interesting, and it gets aggregated for the purposes of the self-attention mechanism.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"For any token, what are x, k, v, and q?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"For any token, what are x, k, v, and q?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "97cc609b13305c559618ec78a438abc56230b9381f827f22d070313b9a1f3777"
|
||||
}
|
||||
},
|
||||
"colab": {
|
||||
"provenance": []
|
||||
}
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "97cc609b13305c559618ec78a438abc56230b9381f827f22d070313b9a1f3777"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -15,13 +15,14 @@
|
||||
">[openCypher](https://opencypher.org/) is an open-source implementation of Cypher.# Neptune Open Cypher QA Chain\n",
|
||||
"This QA chain queries Amazon Neptune using openCypher and returns human readable response\n",
|
||||
"\n",
|
||||
"LangChain supports both [Neptune Database](https://docs.aws.amazon.com/neptune/latest/userguide/intro.html) and [Neptune Analytics](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html) with `NeptuneOpenCypherQAChain` \n",
|
||||
"\n",
|
||||
"LangChain supports both [Neptune Database](https://docs.aws.amazon.com/neptune/latest/userguide/intro.html) and [Neptune Analytics](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html) with `create_neptune_opencypher_qa_chain`.\n",
|
||||
"\n",
|
||||
"Neptune Database is a serverless graph database designed for optimal scalability and availability. It provides a solution for graph database workloads that need to scale to 100,000 queries per second, Multi-AZ high availability, and multi-Region deployments. You can use Neptune Database for social networking, fraud alerting, and Customer 360 applications.\n",
|
||||
"\n",
|
||||
"Neptune Analytics is an analytics database engine that can quickly analyze large amounts of graph data in memory to get insights and find trends. Neptune Analytics is a solution for quickly analyzing existing graph databases or graph datasets stored in a data lake. It uses popular graph analytic algorithms and low-latency analytic queries.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Using Neptune Database"
|
||||
]
|
||||
},
|
||||
@@ -31,7 +32,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.graphs import NeptuneGraph\n",
|
||||
"from langchain_aws.graphs import NeptuneGraph\n",
|
||||
"\n",
|
||||
"host = \"<neptune-host>\"\n",
|
||||
"port = 8182\n",
|
||||
@@ -53,7 +54,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.graphs import NeptuneAnalyticsGraph\n",
|
||||
"from langchain_aws.graphs import NeptuneAnalyticsGraph\n",
|
||||
"\n",
|
||||
"graph = NeptuneAnalyticsGraph(graph_identifier=\"<neptune-analytics-graph-id>\")"
|
||||
]
|
||||
@@ -62,42 +63,197 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using NeptuneOpenCypherQAChain\n",
|
||||
"## Using the Neptune openCypher QA Chain\n",
|
||||
"\n",
|
||||
"This QA chain queries Neptune graph database using openCypher and returns human readable response."
|
||||
"This QA chain queries the Neptune graph database using openCypher and returns a human-readable response."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Austin airport has 98 outgoing routes.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_aws import ChatBedrockConverse\n",
|
||||
"from langchain_aws.chains import create_neptune_opencypher_qa_chain\n",
|
||||
"\n",
|
||||
"MODEL_ID = \"anthropic.claude-3-5-sonnet-20241022-v2:0\"\n",
|
||||
"llm = ChatBedrockConverse(\n",
|
||||
" model=MODEL_ID,\n",
|
||||
" temperature=0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = create_neptune_opencypher_qa_chain(llm=llm, graph=graph)\n",
|
||||
"\n",
|
||||
"result = chain.invoke(\"How many outgoing routes does the Austin airport have?\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Adding Message History\n",
|
||||
"\n",
|
||||
"The Neptune openCypher QA chain has the ability to be wrapped by [`RunnableWithMessageHistory`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory). This adds message history to the chain, allowing us to create a chatbot that retains conversation state across multiple invocations.\n",
|
||||
"\n",
|
||||
"To start, we need a way to store and load the message history. For this purpose, each thread will be created as an instance of [`InMemoryChatMessageHistory`](https://python.langchain.com/api_reference/core/chat_history/langchain_core.chat_history.InMemoryChatMessageHistory.html), and stored into a dictionary for repeated access.\n",
|
||||
"\n",
|
||||
"(Also see: https://python.langchain.com/docs/versions/migrating_memory/chat_history/#chatmessagehistory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.chat_history import InMemoryChatMessageHistory\n",
|
||||
"\n",
|
||||
"chats_by_session_id = {}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_chat_history(session_id: str) -> InMemoryChatMessageHistory:\n",
|
||||
" chat_history = chats_by_session_id.get(session_id)\n",
|
||||
" if chat_history is None:\n",
|
||||
" chat_history = InMemoryChatMessageHistory()\n",
|
||||
" chats_by_session_id[session_id] = chat_history\n",
|
||||
" return chat_history"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, the QA chain and message history storage can be used to create the new `RunnableWithMessageHistory`. Note that we must set `query` as the input key to match the format expected by the base chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"\n",
|
||||
"runnable_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" get_chat_history,\n",
|
||||
" input_messages_key=\"query\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Before invoking the chain, a unique `session_id` needs to be generated for the conversation that the new `InMemoryChatMessageHistory` will remember."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"session_id = uuid.uuid4()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, invoke the message history enabled chain with the `session_id`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The Austin airport has 98 outgoing routes.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"You can fly directly to 98 destinations from Austin airport.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import NeptuneOpenCypherQAChain\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
|
||||
"\n",
|
||||
"chain = NeptuneOpenCypherQAChain.from_llm(llm=llm, graph=graph)\n",
|
||||
"\n",
|
||||
"chain.invoke(\"how many outgoing routes does the Austin airport have?\")"
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"How many destinations can I fly to directly from Austin airport?\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As the chain continues to be invoked with the same `session_id`, responses will be returned in the context of previous queries in the conversation.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"You can fly directly to 4 destinations in Europe from Austin airport.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"Out of those destinations, how many are in Europe?\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The four European destinations you can fly to directly from Austin airport are:\n",
|
||||
"- AMS (Amsterdam Airport Schiphol)\n",
|
||||
"- FRA (Frankfurt am Main)\n",
|
||||
"- LGW (London Gatwick)\n",
|
||||
"- LHR (London Heathrow)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"Give me the codes and names of those airports.\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -111,7 +267,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"This example uses a `NeptuneRdfGraph` class that connects with the Neptune database and loads its schema. \n",
|
||||
"The `NeptuneSparqlQAChain` is used to connect the graph and LLM to ask natural language questions.\n",
|
||||
"The `create_neptune_sparql_qa_chain` is used to connect the graph and LLM to ask natural language questions.\n",
|
||||
"\n",
|
||||
"This notebook demonstrates an example using organizational data.\n",
|
||||
"\n",
|
||||
@@ -48,7 +48,7 @@
|
||||
"\n",
|
||||
"Seed the W3C organizational data, W3C org ontology plus some instances. \n",
|
||||
" \n",
|
||||
"You will need an S3 bucket in the same region and account. Set `STAGE_BUCKET`as the name of that bucket."
|
||||
"You will need an S3 bucket in the same region and account as the Neptune cluster. Set `STAGE_BUCKET`as the name of that bucket."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -84,7 +84,50 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Bulk-load the org ttl - both ontology and instances"
|
||||
"We will use the `%load` magic command from the `graph-notebook` package to insert the W3C data into the Neptune graph. Before running `%load`, use `%%graph_notebook_config` to set the graph connection parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade --quiet graph-notebook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext graph_notebook.magics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%graph_notebook_config\n",
|
||||
"{\n",
|
||||
" \"host\": \"<neptune-endpoint>\",\n",
|
||||
" \"neptune_service\": \"neptune-db\",\n",
|
||||
" \"port\": 8182,\n",
|
||||
" \"auth_mode\": \"<[DEFAULT|IAM]>\",\n",
|
||||
" \"load_from_s3_arn\": \"<neptune-cluster-load-role-arn>\",\n",
|
||||
" \"ssl\": true,\n",
|
||||
" \"aws_region\": \"<region>\"\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Bulk-load the org ttl - both ontology and instances."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -118,7 +161,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade --quiet langchain langchain-community langchain-aws"
|
||||
"!pip install --upgrade --quiet langchain-aws"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -238,39 +281,186 @@
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create the Neptune Database RDF Graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import boto3\n",
|
||||
"from langchain_aws import ChatBedrock\n",
|
||||
"from langchain_community.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n",
|
||||
"from langchain_community.graphs import NeptuneRdfGraph\n",
|
||||
"from langchain_aws.graphs import NeptuneRdfGraph\n",
|
||||
"\n",
|
||||
"host = \"<your host>\"\n",
|
||||
"port = 8182 # change if different\n",
|
||||
"region = \"us-east-1\" # change if different\n",
|
||||
"graph = NeptuneRdfGraph(host=host, port=port, use_iam_auth=True, region_name=region)\n",
|
||||
"\n",
|
||||
"# Optionally change the schema\n",
|
||||
"# Optionally, change the schema\n",
|
||||
"# elems = graph.get_schema_elements\n",
|
||||
"# change elems ...\n",
|
||||
"# graph.load_schema(elems)\n",
|
||||
"# graph.load_schema(elems)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using the Neptune SPARQL QA Chain\n",
|
||||
"\n",
|
||||
"MODEL_ID = \"anthropic.claude-v2\"\n",
|
||||
"bedrock_client = boto3.client(\"bedrock-runtime\")\n",
|
||||
"llm = ChatBedrock(model_id=MODEL_ID, client=bedrock_client)\n",
|
||||
"This QA chain queries the Neptune graph database using SPARQL and returns a human-readable response."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_aws import ChatBedrockConverse\n",
|
||||
"from langchain_aws.chains import create_neptune_sparql_qa_chain\n",
|
||||
"\n",
|
||||
"chain = NeptuneSparqlQAChain.from_llm(\n",
|
||||
"MODEL_ID = \"anthropic.claude-3-5-sonnet-20241022-v2:0\"\n",
|
||||
"llm = ChatBedrockConverse(\n",
|
||||
" model_id=MODEL_ID,\n",
|
||||
" temperature=0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = create_neptune_sparql_qa_chain(\n",
|
||||
" llm=llm,\n",
|
||||
" graph=graph,\n",
|
||||
" examples=EXAMPLES,\n",
|
||||
" verbose=True,\n",
|
||||
" top_K=10,\n",
|
||||
" return_intermediate_steps=True,\n",
|
||||
" return_direct=False,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = chain.invoke(\"How many organizations are in the graph?\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here are a few more prompts to try on the graph data that was ingested.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = chain.invoke(\"Are there any mergers or acquisitions?\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = chain.invoke(\"Find organizations.\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = chain.invoke(\"Find sites of MegaSystems or MegaFinancial.\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = chain.invoke(\"Find a member who is a manager of one or more members.\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = chain.invoke(\"Find five members and their managers.\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = chain.invoke(\n",
|
||||
" \"Find org units or suborganizations of The Mega Group. What are the sites of those units?\"\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Adding Message History\n",
|
||||
"\n",
|
||||
"The Neptune SPARQL QA chain has the ability to be wrapped by [`RunnableWithMessageHistory`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory). This adds message history to the chain, allowing us to create a chatbot that retains conversation state across multiple invocations.\n",
|
||||
"\n",
|
||||
"To start, we need a way to store and load the message history. For this purpose, each thread will be created as an instance of [`InMemoryChatMessageHistory`](https://python.langchain.com/api_reference/core/chat_history/langchain_core.chat_history.InMemoryChatMessageHistory.html), and stored into a dictionary for repeated access.\n",
|
||||
"\n",
|
||||
"(Also see: https://python.langchain.com/docs/versions/migrating_memory/chat_history/#chatmessagehistory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.chat_history import InMemoryChatMessageHistory\n",
|
||||
"\n",
|
||||
"chats_by_session_id = {}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_chat_history(session_id: str) -> InMemoryChatMessageHistory:\n",
|
||||
" chat_history = chats_by_session_id.get(session_id)\n",
|
||||
" if chat_history is None:\n",
|
||||
" chat_history = InMemoryChatMessageHistory()\n",
|
||||
" chats_by_session_id[session_id] = chat_history\n",
|
||||
" return chat_history"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, the QA chain and message history storage can be used to create the new `RunnableWithMessageHistory`. Note that we must set `query` as the input key to match the format expected by the base chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"\n",
|
||||
"runnable_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" get_chat_history,\n",
|
||||
" input_messages_key=\"query\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -278,8 +468,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Ask questions\n",
|
||||
"Depends on the data we ingested above"
|
||||
"Before invoking the chain, a unique `session_id` needs to be generated for the conversation that the new `InMemoryChatMessageHistory` will remember.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -288,7 +477,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"\"\"How many organizations are in the graph\"\"\")"
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"session_id = uuid.uuid4()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, invoke the message history enabled chain with the `session_id`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -297,7 +495,18 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"\"\"Are there any mergers or acquisitions\"\"\")"
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"How many org units or suborganizations does the The Mega Group have?\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As the chain continues to be invoked with the same `session_id`, responses will be returned in the context of previous queries in the conversation.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -306,51 +515,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"\"\"Find organizations\"\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"\"\"Find sites of MegaSystems or MegaFinancial\"\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"\"\"Find a member who is manager of one or more members.\"\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"\"\"Find five members and who their manager is.\"\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\n",
|
||||
" \"\"\"Find org units or suborganizations of The Mega Group. What are the sites of those units?\"\"\"\n",
|
||||
")"
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"List the sites for each of the units.\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -364,7 +539,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -90,8 +90,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -41,8 +41,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -43,8 +43,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -45,8 +45,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -21,8 +21,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -310,14 +310,25 @@ from langchain_community.chat_message_histories import DynamoDBChatMessageHistor
|
||||
|
||||
## Graphs
|
||||
|
||||
### Amazon Neptune
|
||||
|
||||
>[Amazon Neptune](https://aws.amazon.com/neptune/)
|
||||
> is a high-performance graph analytics and serverless database for superior scalability and availability.
|
||||
|
||||
For the Cypher and SPARQL integrations below, we need to install the `langchain-aws` library.
|
||||
|
||||
```bash
|
||||
pip install langchain-aws
|
||||
```
|
||||
|
||||
### Amazon Neptune with Cypher
|
||||
|
||||
See a [usage example](/docs/integrations/graphs/amazon_neptune_open_cypher).
|
||||
|
||||
```python
|
||||
from langchain_community.graphs import NeptuneGraph
|
||||
from langchain_community.graphs import NeptuneAnalyticsGraph
|
||||
from langchain_community.chains.graph_qa.neptune_cypher import NeptuneOpenCypherQAChain
|
||||
from langchain_aws.graphs import NeptuneGraph
|
||||
from langchain_aws.graphs import NeptuneAnalyticsGraph
|
||||
from langchain_aws.chains import create_neptune_opencypher_qa_chain
|
||||
```
|
||||
|
||||
### Amazon Neptune with SPARQL
|
||||
@@ -325,8 +336,8 @@ from langchain_community.chains.graph_qa.neptune_cypher import NeptuneOpenCypher
|
||||
See a [usage example](/docs/integrations/graphs/amazon_neptune_sparql).
|
||||
|
||||
```python
|
||||
from langchain_community.graphs import NeptuneRdfGraph
|
||||
from langchain_community.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain
|
||||
from langchain_aws.graphs import NeptuneRdfGraph
|
||||
from langchain_aws.chains import create_neptune_sparql_qa_chain
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -24,3 +24,11 @@ See a [usage example](/docs/integrations/graphs/falkordb).
|
||||
```python
|
||||
from langchain_community.chains.graph_qa.falkordb import FalkorDBQAChain
|
||||
```
|
||||
|
||||
## Memory
|
||||
|
||||
See a [usage example](/docs/integrations/memory/falkordb_chat_message_history).
|
||||
|
||||
```python
|
||||
from langchain_falkordb import FalkorDBChatMessageHistory
|
||||
```
|
||||
|
||||
21
docs/docs/integrations/providers/fmp-data.mdx
Normal file
21
docs/docs/integrations/providers/fmp-data.mdx
Normal file
@@ -0,0 +1,21 @@
|
||||
# FMP Data (Financial Data Prep)
|
||||
|
||||
> [FMP-Data](https://pypi.org/project/fmp-data/) is a python package for connecting to
|
||||
> Financial Data Prep API. It simplifies how you can access production quality data.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Get an `FMP Data` API key by
|
||||
visiting [this page](https://site.financialmodelingprep.com/pricing-plans?couponCode=mehdi).
|
||||
and set it as an environment variable (`FMP_API_KEY`).
|
||||
|
||||
Then, install [langchain-fmp-data](https://pypi.org/project/langchain-fmp-data/).
|
||||
|
||||
## Tools
|
||||
|
||||
See an [example](https://github.com/MehdiZare/langchain-fmp-data/tree/main/docs).
|
||||
|
||||
```python
|
||||
from langchain_fmp_data import FMPDataTool, FMPDataToolkit
|
||||
```
|
||||
67
docs/docs/integrations/providers/hyperbrowser.mdx
Normal file
67
docs/docs/integrations/providers/hyperbrowser.mdx
Normal file
@@ -0,0 +1,67 @@
|
||||
# Hyperbrowser
|
||||
|
||||
> [Hyperbrowser](https://hyperbrowser.ai) is a platform for running and scaling headless browsers. It lets you launch and manage browser sessions at scale and provides easy to use solutions for any webscraping needs, such as scraping a single page or crawling an entire site.
|
||||
>
|
||||
> Key Features:
|
||||
>
|
||||
> - Instant Scalability - Spin up hundreds of browser sessions in seconds without infrastructure headaches
|
||||
> - Simple Integration - Works seamlessly with popular tools like Puppeteer and Playwright
|
||||
> - Powerful APIs - Easy to use APIs for scraping/crawling any site, and much more
|
||||
> - Bypass Anti-Bot Measures - Built-in stealth mode, ad blocking, automatic CAPTCHA solving, and rotating proxies
|
||||
|
||||
For more information about Hyperbrowser, please visit the [Hyperbrowser website](https://hyperbrowser.ai) or if you want to check out the docs, you can visit the [Hyperbrowser docs](https://docs.hyperbrowser.ai).
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
To get started with `langchain-hyperbrowser`, you can install the package using pip:
|
||||
|
||||
```bash
|
||||
pip install langchain-hyperbrowser
|
||||
```
|
||||
|
||||
And you should configure credentials by setting the following environment variables:
|
||||
|
||||
`HYPERBROWSER_API_KEY=<your-api-key>`
|
||||
|
||||
Make sure to get your API Key from https://app.hyperbrowser.ai/
|
||||
|
||||
## Document Loader
|
||||
|
||||
The `HyperbrowserLoader` class in `langchain-hyperbrowser` can easily be used to load content from any single page or multiple pages as well as crawl an entire site.
|
||||
The content can be loaded as markdown or html.
|
||||
|
||||
```python
|
||||
from langchain_hyperbrowser import HyperbrowserLoader
|
||||
|
||||
loader = HyperbrowserLoader(urls="https://example.com")
|
||||
docs = loader.load()
|
||||
|
||||
print(docs[0])
|
||||
```
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
You can specify the operation to be performed by the loader. The default operation is `scrape`. For `scrape`, you can provide a single URL or a list of URLs to be scraped. For `crawl`, you can only provide a single URL. The `crawl` operation will crawl the provided page and subpages and return a document for each page.
|
||||
|
||||
```python
|
||||
loader = HyperbrowserLoader(
|
||||
urls="https://hyperbrowser.ai", api_key="YOUR_API_KEY", operation="crawl"
|
||||
)
|
||||
```
|
||||
|
||||
Optional params for the loader can also be provided in the `params` argument. For more information on the supported params, visit https://docs.hyperbrowser.ai/reference/sdks/python/scrape#start-scrape-job-and-wait or https://docs.hyperbrowser.ai/reference/sdks/python/crawl#start-crawl-job-and-wait.
|
||||
|
||||
```python
|
||||
loader = HyperbrowserLoader(
|
||||
urls="https://example.com",
|
||||
api_key="YOUR_API_KEY",
|
||||
operation="scrape",
|
||||
params={"scrape_options": {"include_tags": ["h1", "h2", "p"]}}
|
||||
)
|
||||
```
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [Hyperbrowser Docs](https://docs.hyperbrowser.ai/)
|
||||
- [GitHub](https://github.com/hyperbrowserai/langchain-hyperbrowser/)
|
||||
- [PyPi](https://pypi.org/project/langchain-hyperbrowser/)
|
||||
112
docs/docs/integrations/providers/lindorm.ipynb
Normal file
112
docs/docs/integrations/providers/lindorm.ipynb
Normal file
@@ -0,0 +1,112 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lindorm\n",
|
||||
"\n",
|
||||
"Lindorm is a cloud-native multimodal database from Alibaba-Cloud, It supports unified access and integrated processing of various types of data, including wide tables, time-series, text, objects, streams, and spatial data. It is compatible with multiple standard interfaces such as SQL, HBase/Cassandra/S3, TSDB, HDFS, Solr, and Kafka, and seamlessly integrates with third-party ecosystem tools. This makes it suitable for scenarios such as logging, monitoring, billing, advertising, social networking, travel, and risk control. Lindorm is also one of the databases that support Alibaba's core businesses. \n",
|
||||
"\n",
|
||||
"To use the AI and vector capabilities of Lindorm, you should [get the service](https://help.aliyun.com/document_detail/174640.html?spm=a2c4g.11186623.help-menu-172543.d_0_1_0.4c6367558DN8Uq) and install `langchain-lindorm-integration` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "y8ku6X96sebl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": "!pip install -U langchain-lindorm-integration"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Embeddings\n",
|
||||
"\n",
|
||||
"To use the embedding model deployed in Lindorm AI Service, import the LindormAIEmbeddings. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "from langchain_lindorm_integration import LindormAIEmbeddings"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Rerank\n",
|
||||
"\n",
|
||||
"The Lindorm AI Service also supports reranking."
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "from langchain_lindorm_integration.reranker import LindormAIRerank"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Vector Store\n",
|
||||
"\n",
|
||||
"Lindorm also supports vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "from langchain_lindorm_integration import LindormVectorStore"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## ByteStore\n",
|
||||
"\n",
|
||||
"Use ByteStore from Lindorm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "from langchain_lindorm_integration import LindormByteStore"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -46,8 +46,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -107,7 +107,7 @@
|
||||
"source": [
|
||||
"### Configure\n",
|
||||
"\n",
|
||||
"Here we define the conncection to Elasticsearch. In this example we use a locally running instance. Alternatively, you can make an account in [Elastic Cloud](https://cloud.elastic.co/) and start a [free trial](https://www.elastic.co/cloud/cloud-trial-overview)."
|
||||
"Here we define the connection to Elasticsearch. In this example we use a locally running instance. Alternatively, you can make an account in [Elastic Cloud](https://cloud.elastic.co/) and start a [free trial](https://www.elastic.co/cloud/cloud-trial-overview)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -194,7 +194,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "029dc5e7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -291,7 +291,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "a6705dda",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -299,7 +299,7 @@
|
||||
"from langchain_ibm import WatsonxRerank\n",
|
||||
"\n",
|
||||
"wx_rerank = WatsonxRerank(\n",
|
||||
" model_id=\"ibm/slate-125m-english-rtrvr\",\n",
|
||||
" model_id=\"cross-encoder/ms-marco-minilm-l-12-v2\",\n",
|
||||
" url=\"https://us-south.ml.cloud.ibm.com\",\n",
|
||||
" project_id=\"PASTE YOUR PROJECT_ID HERE\",\n",
|
||||
")"
|
||||
|
||||
@@ -74,9 +74,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGCHAIN_PROJECT\"] = 'Experimentz'"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGSMITH_PROJECT\"] = 'Experimentz'"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -66,8 +66,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -74,8 +74,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -65,8 +65,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -65,8 +65,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -114,8 +114,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
294
docs/docs/integrations/text_embedding/lindorm.ipynb
Normal file
294
docs/docs/integrations/text_embedding/lindorm.ipynb
Normal file
@@ -0,0 +1,294 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Lindorm\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LindormAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Lindorm embedding models using LangChain. \n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Provider | Package |\n",
|
||||
"|:--------:|:---------------------------------:|\n",
|
||||
"| [Lindorm](/docs/integrations/providers/lindorm/) | [langchain-lindorm-integration](https://pypi.org/project/langchain-lindorm-integration/) |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To access Lindorm embedding models you'll need to create a Lindorm account, get AK&SK, and install the `langchain-lindorm-integration` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"You can get you credentials in the [console](https://lindorm.console.aliyun.com/cn-hangzhou/clusterhou/cluster?spm=a2c4g.11186623.0.0.466534e93Xj6tt)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:09:11.911612Z",
|
||||
"start_time": "2025-01-08T03:09:11.907582Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Config:\n",
|
||||
" AI_LLM_ENDPOINT = os.environ.get(\"AI_ENDPOINT\", \"<AI_ENDPOINT>\")\n",
|
||||
" AI_USERNAME = os.environ.get(\"AI_USERNAME\", \"root\")\n",
|
||||
" AI_PWD = os.environ.get(\"AI_PASSWORD\", \"<PASSWORD>\")\n",
|
||||
"\n",
|
||||
" AI_DEFAULT_EMBEDDING_MODEL = \"bge_m3_model\" # set to your deployed model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Lindorm integration lives in the `langchain-lindorm-integration` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "64853226",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:09:15.249326Z",
|
||||
"start_time": "2025-01-08T03:09:13.476557Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-lindorm-integration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:09:16.950069Z",
|
||||
"start_time": "2025-01-08T03:09:16.385033Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_lindorm_integration import LindormAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = LindormAIEmbeddings(\n",
|
||||
" endpoint=Config.AI_LLM_ENDPOINT,\n",
|
||||
" username=Config.AI_USERNAME,\n",
|
||||
" password=Config.AI_PWD,\n",
|
||||
" model_name=Config.AI_DEFAULT_EMBEDDING_MODEL,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d817716b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:09:18.822848Z",
|
||||
"start_time": "2025-01-08T03:09:18.085150Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:09:23.634046Z",
|
||||
"start_time": "2025-01-08T03:09:23.432791Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.016254117712378502, -0.01154549140483141, 0.0042558759450912476, -0.011416379362344742, -0.01770\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:09:25.960291Z",
|
||||
"start_time": "2025-01-08T03:09:25.743941Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.016254086047410965, -0.011545476503670216, 0.0042558712884783745, -0.011416426859796047, -0.0177\n",
|
||||
"[-0.07268096506595612, -3.236892371205613e-05, -0.0019329536007717252, -0.030644644051790237, -0.018\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `LindormEmbeddings` features and configuration options, please refer to the [API reference](https://pypi.org/project/langchain-lindorm-integration/).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -64,8 +64,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -64,8 +64,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -66,8 +66,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -66,8 +66,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -64,8 +64,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -65,8 +65,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
412
docs/docs/integrations/tools/fmp-data.ipynb
Normal file
412
docs/docs/integrations/tools/fmp-data.ipynb
Normal file
@@ -0,0 +1,412 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "62828c19159a0da8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# FMP Data\n",
|
||||
"\n",
|
||||
"Access financial market data through natural language queries.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The FMP (Financial Modeling Prep) LangChain integration provides a seamless way to access financial market data through natural language queries. This integration offers two main components:\n",
|
||||
"\n",
|
||||
"- `FMPDataToolkit`: Creates collections of tools based on natural language queries\n",
|
||||
"- `FMPDataTool`: A single unified tool that automatically selects and uses the appropriate endpoints\n",
|
||||
"\n",
|
||||
"The integration leverages LangChain's semantic search capabilities to match user queries with the most relevant FMP API endpoints, making financial data access more intuitive and efficient.\n",
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3faf15d8ae5f8500",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -U langchain-fmp-data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "647f66796446eb0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Replace with your actual API keys\n",
|
||||
"os.environ[\"FMP_API_KEY\"] = \"your-fmp-api-key\" # pragma: allowlist secret\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\" # pragma: allowlist secret"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b5de291f5c2f67a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eb86baf3da526812",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d4d6a4b9ac69569f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"There are two main ways to instantiate the FMP LangChain integration:\n",
|
||||
"1. Using FMPDataToolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff5f451182995407",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_fmp_data import FMPDataToolkit\n",
|
||||
"\n",
|
||||
"query = \"Get stock market prices and technical indicators\"\n",
|
||||
"# Basic instantiation\n",
|
||||
"toolkit = FMPDataToolkit(query=query)\n",
|
||||
"\n",
|
||||
"# Instantiation with specific query focus\n",
|
||||
"market_toolkit = FMPDataToolkit(\n",
|
||||
" query=query,\n",
|
||||
" num_results=5,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Instantiation with custom configuration\n",
|
||||
"custom_toolkit = FMPDataToolkit(\n",
|
||||
" query=\"Financial analysis\",\n",
|
||||
" num_results=3,\n",
|
||||
" similarity_threshold=0.4,\n",
|
||||
" cache_dir=\"/custom/cache/path\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8cd16450e03ea000",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2. Using FMPDataTool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c924dd0e34b3db9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_fmp_data import FMPDataTool\n",
|
||||
"from langchain_fmp_data.tools import ResponseFormat\n",
|
||||
"\n",
|
||||
"# Basic instantiation\n",
|
||||
"tool = FMPDataTool()\n",
|
||||
"\n",
|
||||
"# Advanced instantiation with custom settings\n",
|
||||
"advanced_tool = FMPDataTool(\n",
|
||||
" max_iterations=50,\n",
|
||||
" temperature=0.2,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "499dde5c011b44b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation\n",
|
||||
"The tools can be invoked in several ways:\n",
|
||||
"\n",
|
||||
"### Direct Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d2fc685c536bdc54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using FMPDataTool\n",
|
||||
"tool_direct = FMPDataTool()\n",
|
||||
"\n",
|
||||
"# Basic query\n",
|
||||
"# fmt: off\n",
|
||||
"result = tool.invoke({\"query\": \"What's Apple's current stock price?\"})\n",
|
||||
"# fmt: on\n",
|
||||
"\n",
|
||||
"# Advanced query with specific format\n",
|
||||
"# fmt: off\n",
|
||||
"detailed_result = tool_direct.invoke(\n",
|
||||
" {\n",
|
||||
" \"query\": \"Compare Tesla and Ford's profit margins\",\n",
|
||||
" \"response_format\": ResponseFormat.BOTH,\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"# fmt: on"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3735e50bdeb55c4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using with LangChain Agents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "73b3684edd3ddbce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentExecutor, create_openai_functions_agent\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"# Setup\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"toolkit = FMPDataToolkit(\n",
|
||||
" query=\"Stock analysis\",\n",
|
||||
" num_results=3,\n",
|
||||
")\n",
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"\n",
|
||||
"# Create agent\n",
|
||||
"prompt = \"You are a helpful assistant. Answer the user's questions based on the provided context.\"\n",
|
||||
"agent = create_openai_functions_agent(llm, tools, prompt)\n",
|
||||
"agent_executor = AgentExecutor(\n",
|
||||
" agent=agent,\n",
|
||||
" tools=tools,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Run query\n",
|
||||
"# fmt: off\n",
|
||||
"response = agent_executor.invoke({\"input\": \"What's the PE ratio of Microsoft?\"})\n",
|
||||
"# fmt: on"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73654bed2bd79c50",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage\n",
|
||||
"You can customize the tool's behavior:\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "20fa3c2ed5204299",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize with custom settings\n",
|
||||
"advanced_tool = FMPDataTool(\n",
|
||||
" max_iterations=50, # Increase max iterations for complex queries\n",
|
||||
" temperature=0.2, # Adjust temperature for more/less focused responses\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Example of a complex multi-part analysis\n",
|
||||
"query = \"\"\"\n",
|
||||
"Analyze Apple's financial health by:\n",
|
||||
"1. Examining current ratios and debt levels\n",
|
||||
"2. Comparing profit margins to industry average\n",
|
||||
"3. Looking at cash flow trends\n",
|
||||
"4. Assessing growth metrics\n",
|
||||
"\"\"\"\n",
|
||||
"# fmt: off\n",
|
||||
"response = advanced_tool.invoke(\n",
|
||||
" {\n",
|
||||
" \"query\": query,\n",
|
||||
" \"response_format\": ResponseFormat.BOTH}\n",
|
||||
")\n",
|
||||
"# fmt: on\n",
|
||||
"print(\"Detailed Financial Analysis:\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f2d5e31becc88d70",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"You can chain the tool similar to other tools simply by creating a chain with desired model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ad1d8e8575c7bc7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"# Setup\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"toolkit = FMPDataToolkit(query=\"Stock analysis\", num_results=3)\n",
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"\n",
|
||||
"llm_with_tools = llm.bind(functions=tools)\n",
|
||||
"output_parser = StrOutputParser()\n",
|
||||
"# Create chain\n",
|
||||
"runner = llm_with_tools | output_parser\n",
|
||||
"\n",
|
||||
"# Run chain\n",
|
||||
"# fmt: off\n",
|
||||
"response = runner.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"What's the PE ratio of Microsoft?\"\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"# fmt: on"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2fe9b99e6bd5d3bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"### FMPDataToolkit\n",
|
||||
"Main class for creating collections of FMP API tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd71cf7dda4e1579",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Any\n",
|
||||
"\n",
|
||||
"from langchain.tools import Tool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class FMPDataToolkit:\n",
|
||||
" \"\"\"Creates a collection of FMP data tools based on queries.\"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" query: str | None = None,\n",
|
||||
" num_results: int = 3,\n",
|
||||
" similarity_threshold: float = 0.3,\n",
|
||||
" cache_dir: str | None = None,\n",
|
||||
" ): ...\n",
|
||||
"\n",
|
||||
" def get_tools(self) -> list[Tool]:\n",
|
||||
" \"\"\"Returns a list of relevant FMP API tools based on the query.\"\"\"\n",
|
||||
" ..."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8fe43c9a7cf7216c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### FMPDataTool\n",
|
||||
"Unified tool that automatically selects appropriate FMP endpoints:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5fbd891b3798e529",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# fmt: off\n",
|
||||
"class FMPDataTool:\n",
|
||||
" \"\"\"Single unified tool for accessing FMP data through natural language.\"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" max_iterations: int = 3,\n",
|
||||
" temperature: float = 0.0,\n",
|
||||
" ): ...\n",
|
||||
"\n",
|
||||
" def invoke(\n",
|
||||
" self,\n",
|
||||
" input: dict[str, Any],\n",
|
||||
" ) -> str | dict[str, Any]:\n",
|
||||
" \"\"\"Execute a natural language query against FMP API.\"\"\"\n",
|
||||
" ...\n",
|
||||
"\n",
|
||||
"# fmt: on"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6b336afd0cdf2bd5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ResponseFormat\n",
|
||||
"Enum for controlling response format:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eb57dbcb88d8d118",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from enum import Enum\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class ResponseFormat(str, Enum):\n",
|
||||
" RAW = \"raw\" # Raw API response\n",
|
||||
" ANALYSIS = \"text\" # Natural language analysis\n",
|
||||
" BOTH = \"both\" # Both raw data and analysis"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -44,8 +44,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -82,8 +82,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -91,8 +91,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -71,8 +71,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"\n",
|
||||
"Optionally you can set the following environment variables:\n",
|
||||
"\n",
|
||||
"- `LANGCHAIN_TRACING_V2=true`: To enable LangSmith log run tracing that can also be bind to respective Action Server action run logs. See [LangSmith documentation](https://docs.smith.langchain.com/tracing#log-runs) for more.\n",
|
||||
"- `LANGSMITH_TRACING=true`: To enable LangSmith log run tracing that can also be bind to respective Action Server action run logs. See [LangSmith documentation](https://docs.smith.langchain.com/tracing#log-runs) for more.\n",
|
||||
"\n",
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
|
||||
@@ -106,8 +106,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -97,8 +97,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -82,8 +82,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -74,8 +74,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -74,8 +74,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -55,8 +55,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
327
docs/docs/integrations/vectorstores/lindorm.ipynb
Normal file
327
docs/docs/integrations/vectorstores/lindorm.ipynb
Normal file
@@ -0,0 +1,327 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "1957f5cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Lindorm\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef1f0986",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LindormVectorStore\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with the Lindorm vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36fdc060",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Lindorm vector stores you'll need to create a Lindorm account, get the ak/sk, and install the `langchain-lindorm-integration` integration package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "%pip install -qU \"langchain-lindorm-integration\"",
|
||||
"id": "beaec7d673c569f2"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9695dee7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [here](https://help.aliyun.com/document_detail/2773369.html?spm=a2c4g.11186623.help-menu-172543.d_2_5_0.2a383f96gr5N3M&scm=20140722.H_2773369._.OR_help-T_cn~zh-V_1) to sign up to Lindorm and generate the ak/sk."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "894c30e4",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:05:01.792898Z",
|
||||
"start_time": "2025-01-08T03:05:01.786082Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Config:\n",
|
||||
" SEARCH_ENDPOINT = os.environ.get(\"SEARCH_ENDPOINT\", \"SEARCH_ENDPOINT\")\n",
|
||||
" SEARCH_USERNAME = os.environ.get(\"SEARCH_USERNAME\", \"root\")\n",
|
||||
" SEARCH_PWD = os.environ.get(\"SEARCH_PASSWORD\", \"<PASSWORD>\")\n",
|
||||
" AI_LLM_ENDPOINT = os.environ.get(\"AI_ENDPOINT\", \"<AI_ENDPOINT>\")\n",
|
||||
" AI_USERNAME = os.environ.get(\"AI_USERNAME\", \"root\")\n",
|
||||
" AI_PWD = os.environ.get(\"AI_PASSWORD\", \"<PASSWORD>\")\n",
|
||||
" AI_DEFAULT_EMBEDDING_MODEL = \"bge_m3_model\" # set to your model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93df377e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"here we use the embedding model deployed on Lindorm AI Service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "dc37144c-208d-4ab3-9f3a-0407a69fe052",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:05:06.907334Z",
|
||||
"start_time": "2025-01-08T03:05:02.922523Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_lindorm_integration.embeddings import LindormAIEmbeddings\n",
|
||||
"from langchain_lindorm_integration.vectorstores import LindormVectorStore\n",
|
||||
"\n",
|
||||
"embeddings = LindormAIEmbeddings(\n",
|
||||
" endpoint=Config.AI_LLM_ENDPOINT,\n",
|
||||
" username=Config.AI_USERNAME,\n",
|
||||
" password=Config.AI_PWD,\n",
|
||||
" model_name=Config.AI_DEFAULT_EMBEDDING_MODEL,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"index = \"test_index\"\n",
|
||||
"vector = embeddings.embed_query(\"hello word\")\n",
|
||||
"dimension = len(vector)\n",
|
||||
"vector_store = LindormVectorStore(\n",
|
||||
" lindorm_search_url=Config.SEARCH_ENDPOINT,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" http_auth=(Config.SEARCH_USERNAME, Config.SEARCH_PWD),\n",
|
||||
" dimension=dimension,\n",
|
||||
" embeddings=embeddings,\n",
|
||||
" index_name=index,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac6071d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "17f5efc0",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:05:18.586696Z",
|
||||
"start_time": "2025-01-08T03:05:10.531559Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['1', '2', '3']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(page_content=\"foo\", metadata={\"source\": \"https://example.com\"})\n",
|
||||
"\n",
|
||||
"document_2 = Document(page_content=\"bar\", metadata={\"source\": \"https://example.com\"})\n",
|
||||
"\n",
|
||||
"document_3 = Document(page_content=\"baz\", metadata={\"source\": \"https://example.com\"})\n",
|
||||
"\n",
|
||||
"documents = [document_1, document_2, document_3]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=[\"1\", \"2\", \"3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dcf1b905",
|
||||
"metadata": {},
|
||||
"source": "### Delete items from vector store\n"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ef61e188",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:06:14.829559Z",
|
||||
"start_time": "2025-01-08T03:06:14.323751Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'took': 400,\n",
|
||||
" 'timed_out': False,\n",
|
||||
" 'total': 1,\n",
|
||||
" 'deleted': 1,\n",
|
||||
" 'batches': 1,\n",
|
||||
" 'version_conflicts': 0,\n",
|
||||
" 'noops': 0,\n",
|
||||
" 'retries': {'bulk': 0, 'search': 0},\n",
|
||||
" 'throttled_millis': 0,\n",
|
||||
" 'requests_per_second': -1.0,\n",
|
||||
" 'throttled_until_millis': 0,\n",
|
||||
" 'failures': []}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[\"3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c3620501",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "aa0a16fa",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:07:47.248796Z",
|
||||
"start_time": "2025-01-08T03:07:46.939607Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* foo [{'source': 'https://example.com'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(query=\"thud\", k=1)\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ed9d733",
|
||||
"metadata": {},
|
||||
"source": "If you want to execute a similarity search and receive the corresponding scores you can run:\n"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "5efd2eaa",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-08T03:08:02.758719Z",
|
||||
"start_time": "2025-01-08T03:08:02.501683Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.671268] foo [{'source': 'https://example.com'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(query=\"thud\", k=1)\n",
|
||||
"for doc, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "901c75dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials](/docs/tutorials/)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8a27244f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all LindormVectorStore features and configurations head to [the API reference](https://pypi.org/project/langchain-lindorm-integration/)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -130,8 +130,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -140,8 +140,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"### Tavily\n",
|
||||
|
||||
@@ -88,8 +88,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -98,8 +98,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Quickstart\n",
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-core"
|
||||
"pip install --upgrade --quiet langchain-core"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -65,8 +65,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -75,8 +75,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -76,8 +76,8 @@
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")\n",
|
||||
"\n",
|
||||
"# Uncomment the below to use LangSmith. Not required.\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -65,8 +65,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -75,8 +75,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -155,9 +155,9 @@
|
||||
"Note that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"if not os.environ.get(\"LANGCHAIN_API_KEY\"):\n",
|
||||
" os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"if not os.environ.get(\"LANGSMITH_API_KEY\"):\n",
|
||||
" os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -104,8 +104,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -114,8 +114,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Components\n",
|
||||
|
||||
@@ -55,8 +55,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -65,8 +65,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -57,9 +57,9 @@
|
||||
"source": [
|
||||
"```python\n",
|
||||
"# Comment out the below to opt-out of using LangSmith in this notebook. Not required.\n",
|
||||
"if not os.environ.get(\"LANGCHAIN_API_KEY\"):\n",
|
||||
" os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
" os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"if not os.environ.get(\"LANGSMITH_API_KEY\"):\n",
|
||||
" os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
" os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -100,8 +100,8 @@
|
||||
"After you sign up at the link above, make sure to set your environment variables to start logging traces:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"export LANGCHAIN_TRACING_V2=\"true\"\n",
|
||||
"export LANGCHAIN_API_KEY=\"...\"\n",
|
||||
"export LANGSMITH_TRACING=\"true\"\n",
|
||||
"export LANGSMITH_API_KEY=\"...\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, if in a notebook, you can set them with:\n",
|
||||
@@ -110,8 +110,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
@@ -173,7 +173,7 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -86,14 +86,6 @@ CODE_INTERPRETER_TOOL_FEAT_TABLE = {
|
||||
"link": "/docs/integrations/tools/riza",
|
||||
"self_hosting": True,
|
||||
},
|
||||
"E2B Data Analysis": {
|
||||
"langauges": "Python. In beta: JavaScript, R, Java",
|
||||
"sandbox_lifetime": "24 Hours",
|
||||
"upload": True,
|
||||
"return_results": "Text, Images, Videos",
|
||||
"link": "/docs/integrations/tools/e2b_data_analysis",
|
||||
"self_hosting": True,
|
||||
},
|
||||
"Azure Container Apps dynamic sessions": {
|
||||
"langauges": "Python",
|
||||
"sandbox_lifetime": "1 Hour",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user