mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 20:05:58 +00:00
Compare commits
32 Commits
langchain-
...
master
Author | SHA1 | Date | |
---|---|---|---|
|
7e926520d5 | ||
|
d614842d23 | ||
|
ff1602f0fd | ||
|
aee7988a94 | ||
|
a2863f8757 | ||
|
3fb0a55122 | ||
|
5fb8fd863a | ||
|
79a537d308 | ||
|
ba2518995d | ||
|
04a899ebe3 | ||
|
a82d987f09 | ||
|
a60fd06784 | ||
|
629b7a5a43 | ||
|
ab871a7b39 | ||
|
d30c56a8c1 | ||
|
09c1991e96 | ||
|
a7903280dd | ||
|
d0f0d1f966 | ||
|
403fae8eec | ||
|
d6b50ad3f6 | ||
|
10a9c24dae | ||
|
8fc7a723b9 | ||
|
f4863f82e2 | ||
|
ae4b6380d9 | ||
|
ffbc64c72a | ||
|
6b0b317cb5 | ||
|
21962e2201 | ||
|
1eb0bdadfa | ||
|
7ecdac5240 | ||
|
faef3e5d50 | ||
|
d4fc734250 | ||
|
4bc70766b5 |
2
.github/scripts/check_diff.py
vendored
2
.github/scripts/check_diff.py
vendored
@ -38,8 +38,8 @@ IGNORED_PARTNERS = [
|
||||
]
|
||||
|
||||
PY_312_MAX_PACKAGES = [
|
||||
"libs/partners/huggingface", # https://github.com/pytorch/pytorch/issues/130249
|
||||
"libs/partners/voyageai",
|
||||
"libs/partners/chroma", # https://github.com/chroma-core/chroma/issues/4382
|
||||
]
|
||||
|
||||
|
||||
|
4
.github/scripts/prep_api_docs_build.py
vendored
4
.github/scripts/prep_api_docs_build.py
vendored
@ -69,7 +69,7 @@ def main():
|
||||
clean_target_directories([
|
||||
p
|
||||
for p in package_yaml["packages"]
|
||||
if p["repo"].startswith("langchain-ai/")
|
||||
if (p["repo"].startswith("langchain-ai/") or p.get("include_in_api_ref"))
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
])
|
||||
|
||||
@ -78,7 +78,7 @@ def main():
|
||||
p
|
||||
for p in package_yaml["packages"]
|
||||
if not p.get("disabled", False)
|
||||
and p["repo"].startswith("langchain-ai/")
|
||||
and (p["repo"].startswith("langchain-ai/") or p.get("include_in_api_ref"))
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
])
|
||||
|
||||
|
23
.github/workflows/api_doc_build.yml
vendored
23
.github/workflows/api_doc_build.yml
vendored
@ -26,7 +26,20 @@ jobs:
|
||||
id: get-unsorted-repos
|
||||
uses: mikefarah/yq@master
|
||||
with:
|
||||
cmd: yq '.packages[].repo' langchain/libs/packages.yml
|
||||
cmd: |
|
||||
yq '
|
||||
.packages[]
|
||||
| select(
|
||||
(
|
||||
(.repo | test("^langchain-ai/"))
|
||||
and
|
||||
(.repo != "langchain-ai/langchain")
|
||||
)
|
||||
or
|
||||
(.include_in_api_ref // false)
|
||||
)
|
||||
| .repo
|
||||
' langchain/libs/packages.yml
|
||||
|
||||
- name: Parse YAML and checkout repos
|
||||
env:
|
||||
@ -38,11 +51,9 @@ jobs:
|
||||
|
||||
# Checkout each unique repository that is in langchain-ai org
|
||||
for repo in $REPOS; do
|
||||
if [[ "$repo" != "langchain-ai/langchain" && "$repo" == langchain-ai/* ]]; then
|
||||
REPO_NAME=$(echo $repo | cut -d'/' -f2)
|
||||
echo "Checking out $repo to $REPO_NAME"
|
||||
git clone --depth 1 https://github.com/$repo.git $REPO_NAME
|
||||
fi
|
||||
REPO_NAME=$(echo $repo | cut -d'/' -f2)
|
||||
echo "Checking out $repo to $REPO_NAME"
|
||||
git clone --depth 1 https://github.com/$repo.git $REPO_NAME
|
||||
done
|
||||
|
||||
- name: Setup python ${{ env.PYTHON_VERSION }}
|
||||
|
@ -107,7 +107,7 @@ outputs will appear as part of the [AIMessage](/docs/concepts/messages/#aimessag
|
||||
response object. See for example:
|
||||
|
||||
- Generating [audio outputs](/docs/integrations/chat/openai/#audio-generation-preview) with OpenAI;
|
||||
- Generating [image outputs](/docs/integrations/chat/google_generative_ai/#image-generation) with Google Gemini.
|
||||
- Generating [image outputs](/docs/integrations/chat/google_generative_ai/#multimodal-usage) with Google Gemini.
|
||||
|
||||
#### Tools
|
||||
|
||||
|
@ -1,35 +1,26 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"cell_type": "markdown",
|
||||
"id": "d982c99f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Google AI\n",
|
||||
"sidebar_label: Google Gemini\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e49f1e0d",
|
||||
"id": "56a6d990",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"This docs will help you get started with Google AI [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatGoogleGenerativeAI features and configurations head to the [API reference](https://python.langchain.com/api_reference/google_genai/chat_models/langchain_google_genai.chat_models.ChatGoogleGenerativeAI.html).\n",
|
||||
"Access Google's Generative AI models, including the Gemini family, directly via the Gemini API or experiment rapidly using Google AI Studio. The `langchain-google-genai` package provides the LangChain integration for these models. This is often the best starting point for individual developers.\n",
|
||||
"\n",
|
||||
"Google AI offers a number of different chat models. For information on the latest models, their features, context windows, etc. head to the [Google AI docs](https://ai.google.dev/gemini-api/docs/models/gemini).\n",
|
||||
"For information on the latest models, their features, context windows, etc. head to the [Google AI docs](https://ai.google.dev/gemini-api/docs/models/gemini). All examples use the `gemini-2.0-flash` model. Gemini 2.5 Pro and 2.5 Flash can be used via `gemini-2.5-pro-preview-03-25` and `gemini-2.5-flash-preview-04-17`. All model ids can be found in the [Gemini API docs](https://ai.google.dev/gemini-api/docs/models).\n",
|
||||
"\n",
|
||||
":::info Google AI vs Google Cloud Vertex AI\n",
|
||||
"\n",
|
||||
"Google's Gemini models are accessible through Google AI and through Google Cloud Vertex AI. Using Google AI just requires a Google account and an API key. Using Google Cloud Vertex AI requires a Google Cloud account (with term agreements and billing) but offers enterprise features like customer encryption key, virtual private cloud, and more.\n",
|
||||
"\n",
|
||||
"To learn more about the key features of the two APIs see the [Google docs](https://cloud.google.com/vertex-ai/generative-ai/docs/migrate/migrate-google-ai#google-ai).\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/google_generativeai) | Package downloads | Package latest |\n",
|
||||
@ -37,23 +28,46 @@
|
||||
"| [ChatGoogleGenerativeAI](https://python.langchain.com/api_reference/google_genai/chat_models/langchain_google_genai.chat_models.ChatGoogleGenerativeAI.html) | [langchain-google-genai](https://python.langchain.com/api_reference/google_genai/index.html) | ❌ | beta | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"### Setup\n",
|
||||
"\n",
|
||||
"To access Google AI models you'll need to create a Google Acount account, get a Google AI API key, and install the `langchain-google-genai` integration package.\n",
|
||||
"To access Google AI models you'll need to create a Google Account, get a Google AI API key, and install the `langchain-google-genai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to https://ai.google.dev/gemini-api/docs/api-key to generate a Google AI API key. Once you've done this set the GOOGLE_API_KEY environment variable:"
|
||||
"**1. Installation:**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "433e8d2b-9519-4b49-b2c4-7ab65b046c94",
|
||||
"id": "8d12ce35",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U langchain-google-genai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60be0b38",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**2. Credentials:**\n",
|
||||
"\n",
|
||||
"Head to [https://ai.google.dev/gemini-api/docs/api-key](https://ai.google.dev/gemini-api/docs/api-key) (or via Google AI Studio) to generate a Google AI API key.\n",
|
||||
"\n",
|
||||
"### Chat Models\n",
|
||||
"\n",
|
||||
"Use the `ChatGoogleGenerativeAI` class to interact with Google's chat models. See the [API reference](https://python.langchain.com/api_reference/google_genai/chat_models/langchain_google_genai.chat_models.ChatGoogleGenerativeAI.html) for full details.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "fb18c875",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -66,7 +80,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72ee0c4b-9764-423a-9dbf-95129e185210",
|
||||
"id": "f050e8db",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
@ -75,7 +89,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
|
||||
"id": "82cb346f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -85,27 +99,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Google AI integration lives in the `langchain-google-genai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-google-genai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
|
||||
"id": "273cefa0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
@ -115,15 +109,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"execution_count": 4,
|
||||
"id": "7d3dc0b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"llm = ChatGoogleGenerativeAI(\n",
|
||||
" model=\"gemini-2.0-flash-001\",\n",
|
||||
" model=\"gemini-2.0-flash\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=None,\n",
|
||||
" timeout=None,\n",
|
||||
@ -134,7 +128,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b4f3e15",
|
||||
"id": "343a8c13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
@ -142,19 +136,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": 5,
|
||||
"id": "82c5708c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash-001', 'safety_ratings': []}, id='run-61cff164-40be-4f88-a2df-cca58297502f-0', usage_metadata={'input_tokens': 20, 'output_tokens': 7, 'total_tokens': 27, 'input_token_details': {'cache_read': 0}})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-3b28d4b8-8a62-4e6c-ad4e-b53e6e825749-0', usage_metadata={'input_tokens': 20, 'output_tokens': 7, 'total_tokens': 27, 'input_token_details': {'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -173,8 +165,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"execution_count": 6,
|
||||
"id": "49d2d0c2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -191,7 +183,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
|
||||
"id": "ee3f6e1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
@ -201,17 +193,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"execution_count": 7,
|
||||
"id": "3c8407ee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe Programmieren.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash-001', 'safety_ratings': []}, id='run-dd2f8fb9-62d9-4b84-9c97-ed9c34cda313-0', usage_metadata={'input_tokens': 15, 'output_tokens': 7, 'total_tokens': 22, 'input_token_details': {'cache_read': 0}})"
|
||||
"AIMessage(content='Ich liebe Programmieren.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-e5561c6b-2beb-4411-9210-4796b576a7cd-0', usage_metadata={'input_tokens': 15, 'output_tokens': 7, 'total_tokens': 22, 'input_token_details': {'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -241,22 +233,164 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "41c2ff10-a3ba-4f40-b3aa-7a395854849e",
|
||||
"id": "bdae9742",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Image generation\n",
|
||||
"## Multimodal Usage\n",
|
||||
"\n",
|
||||
"Some Gemini models (specifically `gemini-2.0-flash-exp`) support image generation capabilities.\n",
|
||||
"Gemini models can accept multimodal inputs (text, images, audio, video) and, for some models, generate multimodal outputs.\n",
|
||||
"\n",
|
||||
"### Text to image\n",
|
||||
"### Image Input\n",
|
||||
"\n",
|
||||
"See a simple usage example below:"
|
||||
"Provide image inputs along with text using a `HumanMessage` with a list content format. The `gemini-2.0-flash` model can handle images."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7589e14d-8d1b-4c82-965f-5558d80cb677",
|
||||
"execution_count": null,
|
||||
"id": "6833fe5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"# Example using a public URL (remains the same)\n",
|
||||
"message_url = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe the image at the URL.\",\n",
|
||||
" },\n",
|
||||
" {\"type\": \"image_url\", \"image_url\": \"https://picsum.photos/seed/picsum/200/300\"},\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"result_url = llm.invoke([message_url])\n",
|
||||
"print(f\"Response for URL image: {result_url.content}\")\n",
|
||||
"\n",
|
||||
"# Example using a local image file encoded in base64\n",
|
||||
"image_file_path = \"/Users/philschmid/projects/google-gemini/langchain/docs/static/img/agents_vs_chains.png\"\n",
|
||||
"\n",
|
||||
"with open(image_file_path, \"rb\") as image_file:\n",
|
||||
" encoded_image = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"message_local = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"Describe the local image.\"},\n",
|
||||
" {\"type\": \"image_url\", \"image_url\": f\"data:image/png;base64,{encoded_image}\"},\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"result_local = llm.invoke([message_local])\n",
|
||||
"print(f\"Response for local image: {result_local.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b422382",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Other supported `image_url` formats:\n",
|
||||
"- A Google Cloud Storage URI (`gs://...`). Ensure the service account has access.\n",
|
||||
"- A PIL Image object (the library handles encoding).\n",
|
||||
"\n",
|
||||
"### Audio Input\n",
|
||||
"\n",
|
||||
"Provide audio file inputs along with text. Use a model like `gemini-2.0-flash`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a3461836",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"# Ensure you have an audio file named 'example_audio.mp3' or provide the correct path.\n",
|
||||
"audio_file_path = \"example_audio.mp3\"\n",
|
||||
"audio_mime_type = \"audio/mpeg\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open(audio_file_path, \"rb\") as audio_file:\n",
|
||||
" encoded_audio = base64.b64encode(audio_file.read()).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"message = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"Transcribe the audio.\"},\n",
|
||||
" {\n",
|
||||
" \"type\": \"media\",\n",
|
||||
" \"data\": encoded_audio, # Use base64 string directly\n",
|
||||
" \"mime_type\": audio_mime_type,\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"response = llm.invoke([message]) # Uncomment to run\n",
|
||||
"print(f\"Response for audio: {response.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d898e27",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Video Input\n",
|
||||
"\n",
|
||||
"Provide video file inputs along with text. Use a model like `gemini-2.0-flash`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3046e74b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"# Ensure you have a video file named 'example_video.mp4' or provide the correct path.\n",
|
||||
"video_file_path = \"example_video.mp4\"\n",
|
||||
"video_mime_type = \"video/mp4\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open(video_file_path, \"rb\") as video_file:\n",
|
||||
" encoded_video = base64.b64encode(video_file.read()).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"message = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"Describe the first few frames of the video.\"},\n",
|
||||
" {\n",
|
||||
" \"type\": \"media\",\n",
|
||||
" \"data\": encoded_video, # Use base64 string directly\n",
|
||||
" \"mime_type\": video_mime_type,\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"response = llm.invoke([message]) # Uncomment to run\n",
|
||||
"print(f\"Response for video: {response.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2df11d89",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Image Generation (Multimodal Output)\n",
|
||||
"\n",
|
||||
"The `gemini-2.0-flash` model can generate text and images inline (image generation is experimental). You need to specify the desired `response_modalities`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c0b7180f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -266,17 +400,12 @@
|
||||
"<IPython.core.display.Image object>"
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"image/png": {
|
||||
"width": 300
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"from io import BytesIO\n",
|
||||
"\n",
|
||||
"from IPython.display import Image, display\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
@ -301,7 +430,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b14c0d87-cf7e-4d88-bda1-2ab40ec0350a",
|
||||
"id": "14bf00f1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Image and text to image\n",
|
||||
@ -311,8 +440,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0f4ed7a5-980c-4b54-b743-0b988909744c",
|
||||
"execution_count": null,
|
||||
"id": "d65e195c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -322,11 +451,7 @@
|
||||
"<IPython.core.display.Image object>"
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"image/png": {
|
||||
"width": 300
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
@ -349,7 +474,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a62669d8-becd-495f-8f4a-82d7c5d87969",
|
||||
"id": "43b54d3f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also represent an input image and query in a single message by encoding the base64 data in the [data URI scheme](https://en.wikipedia.org/wiki/Data_URI_scheme):"
|
||||
@ -357,8 +482,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "6241da43-e210-43bc-89af-b3c480ea06e9",
|
||||
"execution_count": null,
|
||||
"id": "0dfc7e1e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -368,11 +493,7 @@
|
||||
"<IPython.core.display.Image object>"
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"image/png": {
|
||||
"width": 300
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
@ -403,7 +524,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cfe228d3-6773-4283-9788-87bdf6912b1c",
|
||||
"id": "789818d7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also use LangGraph to manage the conversation history for you as in [this tutorial](/docs/tutorials/chatbot/)."
|
||||
@ -411,7 +532,313 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d1ee55bc-ffc8-4cfa-801c-993953a08cfd",
|
||||
"id": "b037e2dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Calling\n",
|
||||
"\n",
|
||||
"You can equip the model with tools to call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0d759f9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'name': 'get_weather', 'args': {'location': 'San Francisco'}, 'id': 'a6248087-74c5-4b7c-9250-f335e642927c', 'type': 'tool_call'}]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"OK. It's sunny in San Francisco.\", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-ac5bb52c-e244-4c72-9fbc-fb2a9cd7a72e-0', usage_metadata={'input_tokens': 29, 'output_tokens': 11, 'total_tokens': 40, 'input_token_details': {'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.tools import tool\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Define the tool\n",
|
||||
"@tool(description=\"Get the current weather in a given location\")\n",
|
||||
"def get_weather(location: str) -> str:\n",
|
||||
" return \"It's sunny.\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Initialize the model and bind the tool\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\")\n",
|
||||
"llm_with_tools = llm.bind_tools([get_weather])\n",
|
||||
"\n",
|
||||
"# Invoke the model with a query that should trigger the tool\n",
|
||||
"query = \"What's the weather in San Francisco?\"\n",
|
||||
"ai_msg = llm_with_tools.invoke(query)\n",
|
||||
"\n",
|
||||
"# Check the tool calls in the response\n",
|
||||
"print(ai_msg.tool_calls)\n",
|
||||
"\n",
|
||||
"# Example tool call message would be needed here if you were actually running the tool\n",
|
||||
"from langchain_core.messages import ToolMessage\n",
|
||||
"\n",
|
||||
"tool_message = ToolMessage(\n",
|
||||
" content=get_weather(*ai_msg.tool_calls[0][\"args\"]),\n",
|
||||
" tool_call_id=ai_msg.tool_calls[0][\"id\"],\n",
|
||||
")\n",
|
||||
"llm_with_tools.invoke([ai_msg, tool_message]) # Example of passing tool result back"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91d42b86",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Structured Output\n",
|
||||
"\n",
|
||||
"Force the model to respond with a specific structure using Pydantic models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7457dbe4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"name='Abraham Lincoln' height_m=1.93\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Define the desired structure\n",
|
||||
"class Person(BaseModel):\n",
|
||||
" \"\"\"Information about a person.\"\"\"\n",
|
||||
"\n",
|
||||
" name: str = Field(..., description=\"The person's name\")\n",
|
||||
" height_m: float = Field(..., description=\"The person's height in meters\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Initialize the model\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\", temperature=0)\n",
|
||||
"structured_llm = llm.with_structured_output(Person)\n",
|
||||
"\n",
|
||||
"# Invoke the model with a query asking for structured information\n",
|
||||
"result = structured_llm.invoke(\n",
|
||||
" \"Who was the 16th president of the USA, and how tall was he in meters?\"\n",
|
||||
")\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90d4725e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"## Token Usage Tracking\n",
|
||||
"\n",
|
||||
"Access token usage information from the response metadata."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "edcc003e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Prompt engineering is the art and science of crafting effective text prompts to elicit desired and accurate responses from large language models.\n",
|
||||
"\n",
|
||||
"Usage Metadata:\n",
|
||||
"{'input_tokens': 10, 'output_tokens': 24, 'total_tokens': 34, 'input_token_details': {'cache_read': 0}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\")\n",
|
||||
"\n",
|
||||
"result = llm.invoke(\"Explain the concept of prompt engineering in one sentence.\")\n",
|
||||
"\n",
|
||||
"print(result.content)\n",
|
||||
"print(\"\\nUsage Metadata:\")\n",
|
||||
"print(result.usage_metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "28950dbc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Built-in tools\n",
|
||||
"\n",
|
||||
"Google Gemini supports a variety of built-in tools ([google search](https://ai.google.dev/gemini-api/docs/grounding/search-suggestions), [code execution](https://ai.google.dev/gemini-api/docs/code-execution?lang=python)), which can be bound to the model in the usual way."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd074816",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The next total solar eclipse visible in the United States will occur on August 23, 2044. However, the path of totality will only pass through Montana, North Dakota, and South Dakota.\n",
|
||||
"\n",
|
||||
"For a total solar eclipse that crosses a significant portion of the continental U.S., you'll have to wait until August 12, 2045. This eclipse will start in California and end in Florida.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from google.ai.generativelanguage_v1beta.types import Tool as GenAITool\n",
|
||||
"\n",
|
||||
"resp = llm.invoke(\n",
|
||||
" \"When is the next total solar eclipse in US?\",\n",
|
||||
" tools=[GenAITool(google_search={})],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(resp.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "6964be2d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Executable code: print(2*2)\n",
|
||||
"\n",
|
||||
"Code execution result: 4\n",
|
||||
"\n",
|
||||
"2*2 is 4.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/philschmid/projects/google-gemini/langchain/.venv/lib/python3.9/site-packages/langchain_google_genai/chat_models.py:580: UserWarning: \n",
|
||||
" ⚠️ Warning: Output may vary each run. \n",
|
||||
" - 'executable_code': Always present. \n",
|
||||
" - 'execution_result' & 'image_url': May be absent for some queries. \n",
|
||||
"\n",
|
||||
" Validate before using in production.\n",
|
||||
"\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from google.ai.generativelanguage_v1beta.types import Tool as GenAITool\n",
|
||||
"\n",
|
||||
"resp = llm.invoke(\n",
|
||||
" \"What is 2*2, use python\",\n",
|
||||
" tools=[GenAITool(code_execution={})],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for c in resp.content:\n",
|
||||
" if isinstance(c, dict):\n",
|
||||
" if c[\"type\"] == \"code_execution_result\":\n",
|
||||
" print(f\"Code execution result: {c['code_execution_result']}\")\n",
|
||||
" elif c[\"type\"] == \"executable_code\":\n",
|
||||
" print(f\"Executable code: {c['executable_code']}\")\n",
|
||||
" else:\n",
|
||||
" print(c)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a27e6ff4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Native Async\n",
|
||||
"\n",
|
||||
"Use asynchronous methods for non-blocking calls."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "c6803e57",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Async Invoke Result: The sky is blue due to a phenomenon called **Rayle...\n",
|
||||
"\n",
|
||||
"Async Stream Result:\n",
|
||||
"The thread is free, it does not wait,\n",
|
||||
"For answers slow, or tasks of fate.\n",
|
||||
"A promise made, a future bright,\n",
|
||||
"It moves ahead, with all its might.\n",
|
||||
"\n",
|
||||
"A callback waits, a signal sent,\n",
|
||||
"When data's read, or job is spent.\n",
|
||||
"Non-blocking code, a graceful dance,\n",
|
||||
"Responsive apps, a fleeting glance.\n",
|
||||
"\n",
|
||||
"Async Batch Results: ['1 + 1 = 2', '2 + 2 = 4']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def run_async_calls():\n",
|
||||
" # Async invoke\n",
|
||||
" result_ainvoke = await llm.ainvoke(\"Why is the sky blue?\")\n",
|
||||
" print(\"Async Invoke Result:\", result_ainvoke.content[:50] + \"...\")\n",
|
||||
"\n",
|
||||
" # Async stream\n",
|
||||
" print(\"\\nAsync Stream Result:\")\n",
|
||||
" async for chunk in llm.astream(\n",
|
||||
" \"Write a short poem about asynchronous programming.\"\n",
|
||||
" ):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)\n",
|
||||
" print(\"\\n\")\n",
|
||||
"\n",
|
||||
" # Async batch\n",
|
||||
" results_abatch = await llm.abatch([\"What is 1+1?\", \"What is 2+2?\"])\n",
|
||||
" print(\"Async Batch Results:\", [res.content for res in results_abatch])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"await run_async_calls()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "99204b32",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Safety Settings\n",
|
||||
@ -421,8 +848,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "238b2f96-e573-4fac-bbf2-7e52ad926833",
|
||||
"execution_count": null,
|
||||
"id": "d4c14039",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -442,7 +869,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5805d40c-deb8-4924-8e72-a294a0482fc9",
|
||||
"id": "dea38fb1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For an enumeration of the categories and thresholds available, see Google's [safety setting types](https://ai.google.dev/api/python/google/generativeai/types/SafetySettingDict)."
|
||||
@ -450,7 +877,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
"id": "d6d0e853",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
@ -461,7 +888,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -475,7 +902,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1413,6 +1413,23 @@
|
||||
"second_output_message = llm.invoke(history)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90c18d18-b25c-4509-a639-bd652b92f518",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Flex processing\n",
|
||||
"\n",
|
||||
"OpenAI offers a variety of [service tiers](https://platform.openai.com/docs/guides/flex-processing). The \"flex\" tier offers cheaper pricing for requests, with the trade-off that responses may take longer and resources might not always be available. This approach is best suited for non-critical tasks, including model testing, data enhancement, or jobs that can be run asynchronously.\n",
|
||||
"\n",
|
||||
"To use it, initialize the model with `service_tier=\"flex\"`:\n",
|
||||
"```python\n",
|
||||
"llm = ChatOpenAI(model=\"o4-mini\", service_tier=\"flex\")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Note that this is a beta feature that is only available for a subset of models. See OpenAI [docs](https://platform.openai.com/docs/guides/flex-processing) for more detail."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a796d728-971b-408b-88d5-440015bbb941",
|
||||
@ -1420,7 +1437,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatOpenAI features and configurations head to the API reference: https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html"
|
||||
"For detailed documentation of all ChatOpenAI features and configurations head to the [API reference](https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -34,33 +34,46 @@
|
||||
"id": "juAmbgoWD17u"
|
||||
},
|
||||
"source": [
|
||||
"The AstraDB Document Loader returns a list of Langchain Documents from an AstraDB database.\n",
|
||||
"The Astra DB Document Loader returns a list of Langchain `Document` objects read from an Astra DB collection.\n",
|
||||
"\n",
|
||||
"The Loader takes the following parameters:\n",
|
||||
"The loader takes the following parameters:\n",
|
||||
"\n",
|
||||
"* `api_endpoint`: AstraDB API endpoint. Looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"* `token`: AstraDB token. Looks like `AstraCS:6gBhNmsk135....`\n",
|
||||
"* `api_endpoint`: Astra DB API endpoint. Looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"* `token`: Astra DB token. Looks like `AstraCS:aBcD0123...`\n",
|
||||
"* `collection_name` : AstraDB collection name\n",
|
||||
"* `namespace`: (Optional) AstraDB namespace\n",
|
||||
"* `namespace`: (Optional) AstraDB namespace (called _keyspace_ in Astra DB)\n",
|
||||
"* `filter_criteria`: (Optional) Filter used in the find query\n",
|
||||
"* `projection`: (Optional) Projection used in the find query\n",
|
||||
"* `find_options`: (Optional) Options used in the find query\n",
|
||||
"* `nb_prefetched`: (Optional) Number of documents pre-fetched by the loader\n",
|
||||
"* `limit`: (Optional) Maximum number of documents to retrieve\n",
|
||||
"* `extraction_function`: (Optional) A function to convert the AstraDB document to the LangChain `page_content` string. Defaults to `json.dumps`\n",
|
||||
"\n",
|
||||
"The following metadata is set to the LangChain Documents metadata output:\n",
|
||||
"The loader sets the following metadata for the documents it reads:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" metadata : {\n",
|
||||
" \"namespace\": \"...\", \n",
|
||||
" \"api_endpoint\": \"...\", \n",
|
||||
" \"collection\": \"...\"\n",
|
||||
" }\n",
|
||||
"metadata={\n",
|
||||
" \"namespace\": \"...\", \n",
|
||||
" \"api_endpoint\": \"...\", \n",
|
||||
" \"collection\": \"...\"\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"langchain-astradb>=0.6,<0.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@ -71,24 +84,43 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import AstraDBLoader"
|
||||
"from langchain_astradb import AstraDBLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[**API Reference:** `AstraDBLoader`](https://python.langchain.com/api_reference/astradb/document_loaders/langchain_astradb.document_loaders.AstraDBLoader.html#langchain_astradb.document_loaders.AstraDBLoader)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-08T12:41:22.643335Z",
|
||||
"start_time": "2024-01-08T12:40:57.759116Z"
|
||||
},
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
@ -98,7 +130,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-08T12:42:25.395162Z",
|
||||
@ -112,19 +144,22 @@
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" collection_name=\"movie_reviews\",\n",
|
||||
" projection={\"title\": 1, \"reviewtext\": 1},\n",
|
||||
" find_options={\"limit\": 10},\n",
|
||||
" limit=10,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-08T12:42:30.236489Z",
|
||||
"start_time": "2024-01-08T12:42:29.612133Z"
|
||||
},
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -133,7 +168,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-08T12:42:31.369394Z",
|
||||
@ -144,10 +179,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='{\"_id\": \"659bdffa16cbc4586b11a423\", \"title\": \"Dangerous Men\", \"reviewtext\": \"\\\\\"Dangerous Men,\\\\\" the picture\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\"}', metadata={'namespace': 'default_keyspace', 'api_endpoint': 'https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com', 'collection': 'movie_reviews'})"
|
||||
"Document(metadata={'namespace': 'default_keyspace', 'api_endpoint': 'https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com', 'collection': 'movie_reviews'}, page_content='{\"_id\": \"659bdffa16cbc4586b11a423\", \"title\": \"Dangerous Men\", \"reviewtext\": \"\\\\\"Dangerous Men,\\\\\" the picture\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\"}')"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -179,7 +214,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -49,7 +49,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import BrowserbaseLoader"
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_community.document_loaders import BrowserbaseLoader\n",
|
||||
"\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"BROWSERBASE_API_KEY = os.getenv(\"BROWSERBASE_API_KEY\")\n",
|
||||
"BROWSERBASE_PROJECT_ID = os.getenv(\"BROWSERBASE_PROJECT_ID\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -59,6 +66,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = BrowserbaseLoader(\n",
|
||||
" api_key=BROWSERBASE_API_KEY,\n",
|
||||
" project_id=BROWSERBASE_PROJECT_ID,\n",
|
||||
" urls=[\n",
|
||||
" \"https://example.com\",\n",
|
||||
" ],\n",
|
||||
@ -78,52 +87,11 @@
|
||||
"\n",
|
||||
"- `urls` Required. A list of URLs to fetch.\n",
|
||||
"- `text_content` Retrieve only text content. Default is `False`.\n",
|
||||
"- `api_key` Optional. Browserbase API key. Default is `BROWSERBASE_API_KEY` env variable.\n",
|
||||
"- `project_id` Optional. Browserbase Project ID. Default is `BROWSERBASE_PROJECT_ID` env variable.\n",
|
||||
"- `api_key` Browserbase API key. Default is `BROWSERBASE_API_KEY` env variable.\n",
|
||||
"- `project_id` Browserbase Project ID. Default is `BROWSERBASE_PROJECT_ID` env variable.\n",
|
||||
"- `session_id` Optional. Provide an existing Session ID.\n",
|
||||
"- `proxy` Optional. Enable/Disable Proxies."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading images\n",
|
||||
"\n",
|
||||
"You can also load screenshots of webpages (as bytes) for multi-modal models.\n",
|
||||
"\n",
|
||||
"Full example using GPT-4V:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from browserbase import Browserbase\n",
|
||||
"from browserbase.helpers.gpt4 import GPT4VImage, GPT4VImageDetail\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=256)\n",
|
||||
"browser = Browserbase()\n",
|
||||
"\n",
|
||||
"screenshot = browser.screenshot(\"https://browserbase.com\")\n",
|
||||
"\n",
|
||||
"result = chat.invoke(\n",
|
||||
" [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"What color is the logo?\"},\n",
|
||||
" GPT4VImage(screenshot, GPT4VImageDetail.auto),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(result.content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
@ -3112,8 +3112,8 @@
|
||||
"|------------|---------|\n",
|
||||
"| langchain_astradb.cache | [AstraDBCache](https://python.langchain.com/api_reference/astradb/cache/langchain_astradb.cache.AstraDBCache.html) |\n",
|
||||
"| langchain_astradb.cache | [AstraDBSemanticCache](https://python.langchain.com/api_reference/astradb/cache/langchain_astradb.cache.AstraDBSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [AstraDBCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AstraDBCache.html) |\n",
|
||||
"| langchain_community.cache | [AstraDBSemanticCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AstraDBSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [AstraDBCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AstraDBCache.html) (deprecated since `langchain-community==0.0.28`) |\n",
|
||||
"| langchain_community.cache | [AstraDBSemanticCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AstraDBSemanticCache.html) (deprecated since `langchain-community==0.0.28`) |\n",
|
||||
"| langchain_community.cache | [AzureCosmosDBSemanticCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AzureCosmosDBSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [CassandraCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.CassandraCache.html) |\n",
|
||||
"| langchain_community.cache | [CassandraSemanticCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.CassandraSemanticCache.html) |\n",
|
||||
|
@ -17,22 +17,22 @@
|
||||
"id": "f507f58b-bf22-4a48-8daf-68d869bcd1ba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To run this notebook you need a running Astra DB. Get the connection secrets on your Astra dashboard:\n",
|
||||
"\n",
|
||||
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`;\n",
|
||||
"- the Token looks like `AstraCS:6gBhNmsk135...`."
|
||||
"- the Database Token looks like `AstraCS:aBcD0123...`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "d7092199",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet \"astrapy>=0.7.1 langchain-community\" "
|
||||
"!pip install \"langchain-astradb>=0.6,<0.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -45,12 +45,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "163d97f0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
|
||||
@ -65,14 +65,6 @@
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "55860b2d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Depending on whether local or cloud-based Astra DB, create the corresponding database connection \"Session\" object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36c163e8",
|
||||
@ -83,12 +75,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "d15e3302",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_message_histories import AstraDBChatMessageHistory\n",
|
||||
"from langchain_astradb import AstraDBChatMessageHistory\n",
|
||||
"\n",
|
||||
"message_history = AstraDBChatMessageHistory(\n",
|
||||
" session_id=\"test-session\",\n",
|
||||
@ -98,22 +90,31 @@
|
||||
"\n",
|
||||
"message_history.add_user_message(\"hi!\")\n",
|
||||
"\n",
|
||||
"message_history.add_ai_message(\"whats up?\")"
|
||||
"message_history.add_ai_message(\"hello, how are you?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "53acb4a8-d536-4a58-9fee-7d70033d9c81",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[**API Reference:** `AstraDBChatMessageHistory`](https://python.langchain.com/api_reference/astradb/chat_message_histories/langchain_astradb.chat_message_histories.AstraDBChatMessageHistory.html#langchain_astradb.chat_message_histories.AstraDBChatMessageHistory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "64fc465e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content='hi!'), AIMessage(content='whats up?')]"
|
||||
"[HumanMessage(content='hi!', additional_kwargs={}, response_metadata={}),\n",
|
||||
" AIMessage(content='hello, how are you?', additional_kwargs={}, response_metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -139,7 +140,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -7,10 +7,10 @@
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install the `hdbcli` python package.
|
||||
We need to install the `langchain-hana` python package.
|
||||
|
||||
```bash
|
||||
pip install hdbcli
|
||||
pip install langchain-hana
|
||||
```
|
||||
|
||||
## Vectorstore
|
||||
@ -21,5 +21,5 @@ pip install hdbcli
|
||||
See a [usage example](/docs/integrations/vectorstores/sap_hanavector).
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores.hanavector import HanaDB
|
||||
from langchain_hana import HanaDB
|
||||
```
|
||||
|
@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Astra DB (Cassandra)\n",
|
||||
"# Astra DB\n",
|
||||
"\n",
|
||||
">[DataStax Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on `Cassandra` and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
@ -16,32 +16,46 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating an Astra DB vector store\n",
|
||||
"First we'll want to create an Astra DB VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"First, create an Astra DB vector store and seed it with some data.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `astrapy` package."
|
||||
"We've created a small demo set of documents containing movie summaries.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires the `lark` package installed (`pip install lark`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet lark astrapy langchain-openai"
|
||||
"!pip install \"langchain-astradb>=0.6,<0.7\" \\\n",
|
||||
" \"langchain_openai>=0.3,<0.4\" \\\n",
|
||||
" \"lark>=1.2,<2.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
"In this example, you'll use the `OpenAIEmbeddings`. Please enter an OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
@ -69,14 +83,23 @@
|
||||
"Create the Astra DB VectorStore:\n",
|
||||
"\n",
|
||||
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"- the Token looks like `AstraCS:6gBhNmsk135....`"
|
||||
"- the Token looks like `AstraCS:aBcD0123...`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
@ -84,11 +107,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import AstraDB\n",
|
||||
"from langchain_astradb import AstraDBVectorStore\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"docs = [\n",
|
||||
@ -101,11 +124,13 @@
|
||||
" metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
|
||||
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams \"\n",
|
||||
" \"within dreams and Inception reused the idea\",\n",
|
||||
" metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
|
||||
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men \"\n",
|
||||
" \"pine after them\",\n",
|
||||
" metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
@ -123,7 +148,7 @@
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"vectorstore = AstraDB.from_documents(\n",
|
||||
"vectorstore = AstraDBVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" collection_name=\"astra_self_query_demo\",\n",
|
||||
@ -136,13 +161,16 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating our self-querying retriever\n",
|
||||
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||
"## Creating a self-querying retriever\n",
|
||||
"\n",
|
||||
"Now you can instantiate the retriever.\n",
|
||||
"\n",
|
||||
"To do this, you need to provide some information upfront about the metadata fields that the documents support, along with a short description of the documents' contents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -174,7 +202,11 @@
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||
" llm,\n",
|
||||
" vectorstore,\n",
|
||||
" document_content_description,\n",
|
||||
" metadata_field_info,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@ -183,14 +215,29 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Testing it out\n",
|
||||
"And now we can try actually using our retriever!"
|
||||
"\n",
|
||||
"Now you can try actually using our retriever:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='d7b9ec1edafa467caab524455e8c1f5d', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
|
||||
" Document(id='8ad04ef2a73d4f74897a51e49be1a8d2', metadata={'year': 1995, 'genre': 'animated'}, page_content='Toys come alive and have a blast doing so'),\n",
|
||||
" Document(id='5b07e600d3494506952b60e0a45a0546', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'rating': 9.9}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
|
||||
" Document(id='a0cef19e27c341929098ac4793602829', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.invoke(\"What are some movies about dinosaurs?\")"
|
||||
@ -198,9 +245,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='5b07e600d3494506952b60e0a45a0546', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'rating': 9.9}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
|
||||
" Document(id='a0cef19e27c341929098ac4793602829', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a filter\n",
|
||||
"retriever.invoke(\"I want to watch a movie rated higher than 8.5\")"
|
||||
@ -208,9 +267,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='0539843fd203484c9be486c2a0e2454c', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a query and a filter\n",
|
||||
"retriever.invoke(\"Has Greta Gerwig directed any movies about women\")"
|
||||
@ -218,9 +288,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='a0cef19e27c341929098ac4793602829', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),\n",
|
||||
" Document(id='5b07e600d3494506952b60e0a45a0546', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'rating': 9.9}, page_content='Three men walk into the Zone, three men walk out of the Zone')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a composite filter\n",
|
||||
"retriever.invoke(\"What's a highly rated (above 8.5), science fiction movie ?\")"
|
||||
@ -228,9 +310,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='8ad04ef2a73d4f74897a51e49be1a8d2', metadata={'year': 1995, 'genre': 'animated'}, page_content='Toys come alive and have a blast doing so')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a query and composite filter\n",
|
||||
"retriever.invoke(\n",
|
||||
@ -242,20 +335,20 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filter k\n",
|
||||
"## Set a limit ('k')\n",
|
||||
"\n",
|
||||
"We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
|
||||
"you can also use the self-query retriever to specify `k`, the number of documents to fetch.\n",
|
||||
"\n",
|
||||
"We can do this by passing `enable_limit=True` to the constructor."
|
||||
"You achieve this by passing `enable_limit=True` to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
"retriever_k = SelfQueryRetriever.from_llm(\n",
|
||||
" llm,\n",
|
||||
" vectorstore,\n",
|
||||
" document_content_description,\n",
|
||||
@ -267,12 +360,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='d7b9ec1edafa467caab524455e8c1f5d', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
|
||||
" Document(id='8ad04ef2a73d4f74897a51e49be1a8d2', metadata={'year': 1995, 'genre': 'animated'}, page_content='Toys come alive and have a blast doing so')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.invoke(\"What are two movies about dinosaurs?\")"
|
||||
"retriever_k.invoke(\"What are two movies about dinosaurs?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -293,7 +398,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@ -322,7 +427,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,13 +1,76 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8543d632",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Google Gemini\n",
|
||||
"keywords: [google gemini embeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "afab8b36-10bb-4795-bc98-75ab2d2081bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Generative AI Embeddings\n",
|
||||
"# Google Generative AI Embeddings (AI Studio & Gemini API)\n",
|
||||
"\n",
|
||||
"Connect to Google's generative AI embeddings service using the `GoogleGenerativeAIEmbeddings` class, found in the [langchain-google-genai](https://pypi.org/project/langchain-google-genai/) package."
|
||||
"Connect to Google's generative AI embeddings service using the `GoogleGenerativeAIEmbeddings` class, found in the [langchain-google-genai](https://pypi.org/project/langchain-google-genai/) package.\n",
|
||||
"\n",
|
||||
"This will help you get started with Google's Generative AI embedding models (like Gemini) using LangChain. For detailed documentation on `GoogleGenerativeAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/v0.2/api_reference/google_genai/embeddings/langchain_google_genai.embeddings.GoogleGenerativeAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Google Gemini\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Google Generative AI embedding models you'll need to create a Google Cloud project, enable the Generative Language API, get an API key, and install the `langchain-google-genai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"To use Google Generative AI models, you must have an API key. You can create one in Google AI Studio. See the [Google documentation](https://ai.google.dev/gemini-api/docs/api-key) for instructions.\n",
|
||||
"\n",
|
||||
"Once you have a key, set it as an environment variable `GOOGLE_API_KEY`:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "47652620",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"GOOGLE_API_KEY\"):\n",
|
||||
" os.environ[\"GOOGLE_API_KEY\"] = getpass.getpass(\"Enter your Google API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "67283790",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eccf1968",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -28,28 +91,6 @@
|
||||
"%pip install --upgrade --quiet langchain-google-genai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "25f3f88e-164e-400d-b371-9fa488baba19",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ec89153f-8999-4aab-a21b-0bfba1cc3893",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if \"GOOGLE_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"GOOGLE_API_KEY\"] = getpass.getpass(\"Provide your Google API key here\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f2437b22-e364-418a-8c13-490a026cb7b5",
|
||||
@ -60,17 +101,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 20,
|
||||
"id": "eedc551e-a1f3-4fd8-8d65-4e0784c4441b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[0.05636945, 0.0048285457, -0.0762591, -0.023642512, 0.05329321]"
|
||||
"[-0.024917153641581535,\n",
|
||||
" 0.012005362659692764,\n",
|
||||
" -0.003886754624545574,\n",
|
||||
" -0.05774897709488869,\n",
|
||||
" 0.0020742062479257584]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -78,7 +123,7 @@
|
||||
"source": [
|
||||
"from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = GoogleGenerativeAIEmbeddings(model=\"models/text-embedding-004\")\n",
|
||||
"embeddings = GoogleGenerativeAIEmbeddings(model=\"models/gemini-embedding-exp-03-07\")\n",
|
||||
"vector = embeddings.embed_query(\"hello, world!\")\n",
|
||||
"vector[:5]"
|
||||
]
|
||||
@ -95,17 +140,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"id": "6ec53aba-404f-4778-acd9-5d6664e79ed2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(3, 768)"
|
||||
"(3, 3072)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -121,6 +166,56 @@
|
||||
"len(vectors), len(vectors[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c362bfbf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "606a7f65",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1482486f-5617-498a-8a44-1974d3212dda",
|
||||
@ -129,70 +224,74 @@
|
||||
"## Task type\n",
|
||||
"`GoogleGenerativeAIEmbeddings` optionally support a `task_type`, which currently must be one of:\n",
|
||||
"\n",
|
||||
"- task_type_unspecified\n",
|
||||
"- retrieval_query\n",
|
||||
"- retrieval_document\n",
|
||||
"- semantic_similarity\n",
|
||||
"- classification\n",
|
||||
"- clustering\n",
|
||||
"- `SEMANTIC_SIMILARITY`: Used to generate embeddings that are optimized to assess text similarity.\n",
|
||||
"- `CLASSIFICATION`: Used to generate embeddings that are optimized to classify texts according to preset labels.\n",
|
||||
"- `CLUSTERING`: Used to generate embeddings that are optimized to cluster texts based on their similarities.\n",
|
||||
"- `RETRIEVAL_DOCUMENT`, `RETRIEVAL_QUERY`, `QUESTION_ANSWERING`, and `FACT_VERIFICATION`: Used to generate embeddings that are optimized for document search or information retrieval.\n",
|
||||
"- `CODE_RETRIEVAL_QUERY`: Used to retrieve a code block based on a natural language query, such as sort an array or reverse a linked list. Embeddings of the code blocks are computed using `RETRIEVAL_DOCUMENT`.\n",
|
||||
"\n",
|
||||
"By default, we use `retrieval_document` in the `embed_documents` method and `retrieval_query` in the `embed_query` method. If you provide a task type, we will use that for all methods."
|
||||
"By default, we use `RETRIEVAL_DOCUMENT` in the `embed_documents` method and `RETRIEVAL_QUERY` in the `embed_query` method. If you provide a task type, we will use that for all methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "a223bb25-2b1b-418e-a570-2f543083132e",
|
||||
"execution_count": null,
|
||||
"id": "b7acc5c2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet matplotlib scikit-learn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 19,
|
||||
"id": "f1f077db-8eb4-49f7-8866-471a8528dcdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Document 1\n",
|
||||
"Cosine similarity with query: 0.7892893360164779\n",
|
||||
"---\n",
|
||||
"Document 2\n",
|
||||
"Cosine similarity with query: 0.5438283285204146\n",
|
||||
"---\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
|
||||
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
||||
"\n",
|
||||
"query_embeddings = GoogleGenerativeAIEmbeddings(\n",
|
||||
" model=\"models/embedding-001\", task_type=\"retrieval_query\"\n",
|
||||
" model=\"models/gemini-embedding-exp-03-07\", task_type=\"RETRIEVAL_QUERY\"\n",
|
||||
")\n",
|
||||
"doc_embeddings = GoogleGenerativeAIEmbeddings(\n",
|
||||
" model=\"models/embedding-001\", task_type=\"retrieval_document\"\n",
|
||||
")"
|
||||
" model=\"models/gemini-embedding-exp-03-07\", task_type=\"RETRIEVAL_DOCUMENT\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"q_embed = query_embeddings.embed_query(\"What is the capital of France?\")\n",
|
||||
"d_embed = doc_embeddings.embed_documents(\n",
|
||||
" [\"The capital of France is Paris.\", \"Philipp is likes to eat pizza.\"]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for i, d in enumerate(d_embed):\n",
|
||||
" print(f\"Document {i+1}:\")\n",
|
||||
" print(f\"Cosine similarity with query: {cosine_similarity([q_embed], [d])[0][0]}\")\n",
|
||||
" print(\"---\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79bd4a5e-75ba-413c-befa-86167c938caf",
|
||||
"id": "f45ea7b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"All of these will be embedded with the 'retrieval_query' task set\n",
|
||||
"```python\n",
|
||||
"query_vecs = [query_embeddings.embed_query(q) for q in [query, query_2, answer_1]]\n",
|
||||
"```\n",
|
||||
"All of these will be embedded with the 'retrieval_document' task set\n",
|
||||
"```python\n",
|
||||
"doc_vecs = [doc_embeddings.embed_query(q) for q in [query, query_2, answer_1]]\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9e1fae5e-0f84-4812-89f5-7d4d71affbc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In retrieval, relative distance matters. In the image above, you can see the difference in similarity scores between the \"relevant doc\" and \"simil stronger delta between the similar query and relevant doc on the latter case."
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `GoogleGenerativeAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/google_genai/embeddings/langchain_google_genai.embeddings.GoogleGenerativeAIEmbeddings.html).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -211,7 +310,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -225,7 +324,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -26,7 +26,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "f7b3767b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -83,40 +83,39 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "851fee9f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": null,
|
||||
"id": "7f056cc3-628d-46ba-b394-ee1d89f8650a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Download the README here and identify the link for LangChain tutorials: https://raw.githubusercontent.com/langchain-ai/langchain/master/README.md\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" terminal (call_mr86V0d6E9nQiJZT7Xw5fH0G)\n",
|
||||
" Call ID: call_mr86V0d6E9nQiJZT7Xw5fH0G\n",
|
||||
" Args:\n",
|
||||
" commands: ['curl -o README.md https://raw.githubusercontent.com/langchain-ai/langchain/master/README.md']\n",
|
||||
"Executing command:\n",
|
||||
" ['curl -o README.md https://raw.githubusercontent.com/langchain-ai/langchain/master/README.md']\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: terminal\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: What is the task?\n",
|
||||
"Thought: We need to download the langchain.com webpage and extract all the URLs from it. Then we need to sort the URLs and return them.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"shell\",\n",
|
||||
" \"action_input\": {\n",
|
||||
" \"commands\": [\n",
|
||||
" \"curl -s https://langchain.com | grep -o 'http[s]*://[^\\\" ]*' | sort\"\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m"
|
||||
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
||||
" Dload Upload Total Spent Left Speed\n",
|
||||
"100 5169 100 5169 0 0 114k 0 --:--:-- --:--:-- --:--:-- 114k\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/wfh/code/lc/lckg/langchain/tools/shell/tool.py:34: UserWarning: The shell tool has no safeguards by default. Use at your own risk.\n",
|
||||
"/langchain/libs/community/langchain_community/tools/shell/tool.py:33: UserWarning: The shell tool has no safeguards by default. Use at your own risk.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
@ -124,50 +123,58 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" terminal (call_LF8TGrgS84WvUvaazYnVfib8)\n",
|
||||
" Call ID: call_LF8TGrgS84WvUvaazYnVfib8\n",
|
||||
" Args:\n",
|
||||
" commands: [\"grep -i 'tutorial' README.md\"]\n",
|
||||
"Executing command:\n",
|
||||
" [\"grep -i 'tutorial' README.md\"]\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: terminal\n",
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mhttps://blog.langchain.dev/\n",
|
||||
"https://discord.gg/6adMQxSpJS\n",
|
||||
"https://docs.langchain.com/docs/\n",
|
||||
"https://github.com/hwchase17/chat-langchain\n",
|
||||
"https://github.com/hwchase17/langchain\n",
|
||||
"https://github.com/hwchase17/langchainjs\n",
|
||||
"https://github.com/sullivan-sean/chat-langchainjs\n",
|
||||
"https://js.langchain.com/docs/\n",
|
||||
"https://python.langchain.com/en/latest/\n",
|
||||
"https://twitter.com/langchainai\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe URLs have been successfully extracted and sorted. We can return the list of URLs as the final answer.\n",
|
||||
"Final Answer: [\"https://blog.langchain.dev/\", \"https://discord.gg/6adMQxSpJS\", \"https://docs.langchain.com/docs/\", \"https://github.com/hwchase17/chat-langchain\", \"https://github.com/hwchase17/langchain\", \"https://github.com/hwchase17/langchainjs\", \"https://github.com/sullivan-sean/chat-langchainjs\", \"https://js.langchain.com/docs/\", \"https://python.langchain.com/en/latest/\", \"https://twitter.com/langchainai\"]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"- [Tutorials](https://python.langchain.com/docs/tutorials/): Simple walkthroughs with\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'[\"https://blog.langchain.dev/\", \"https://discord.gg/6adMQxSpJS\", \"https://docs.langchain.com/docs/\", \"https://github.com/hwchase17/chat-langchain\", \"https://github.com/hwchase17/langchain\", \"https://github.com/hwchase17/langchainjs\", \"https://github.com/sullivan-sean/chat-langchainjs\", \"https://js.langchain.com/docs/\", \"https://python.langchain.com/en/latest/\", \"https://twitter.com/langchainai\"]'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/langchain/libs/community/langchain_community/tools/shell/tool.py:33: UserWarning: The shell tool has no safeguards by default. Use at your own risk.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The link for LangChain tutorials in the README is: https://python.langchain.com/docs/tutorials/\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"tools = [shell_tool]\n",
|
||||
"agent = create_react_agent(\"openai:gpt-4.1-mini\", tools)\n",
|
||||
"\n",
|
||||
"shell_tool.description = shell_tool.description + f\"args {shell_tool.args}\".replace(\n",
|
||||
" \"{\", \"{{\"\n",
|
||||
").replace(\"}\", \"}}\")\n",
|
||||
"self_ask_with_search = initialize_agent(\n",
|
||||
" [shell_tool], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")\n",
|
||||
"self_ask_with_search.run(\n",
|
||||
" \"Download the langchain.com webpage and grep for all urls. Return only a sorted list of them. Be sure to use double quotes.\"\n",
|
||||
")"
|
||||
"input_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": (\n",
|
||||
" \"Download the README here and identify the link for LangChain tutorials: \"\n",
|
||||
" \"https://raw.githubusercontent.com/langchain-ai/langchain/master/README.md\"\n",
|
||||
" ),\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": [input_message]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -195,7 +202,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -6,18 +6,16 @@
|
||||
"source": [
|
||||
"# SAP HANA Cloud Vector Engine\n",
|
||||
"\n",
|
||||
">[SAP HANA Cloud Vector Engine](https://www.sap.com/events/teched/news-guide/ai.html#article8) is a vector store fully integrated into the `SAP HANA Cloud` database.\n",
|
||||
"\n",
|
||||
"You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration"
|
||||
">[SAP HANA Cloud Vector Engine](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/sap-hana-cloud-sap-hana-database-vector-engine-guide) is a vector store fully integrated into the `SAP HANA Cloud` database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Installation of the HANA database driver."
|
||||
"Install the `langchain-hana` external integration package, as well as the other packages used throughout this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -26,53 +24,36 @@
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Pip install necessary package\n",
|
||||
"%pip install --upgrade --quiet hdbcli"
|
||||
"%pip install -qU langchain-hana"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For `OpenAIEmbeddings` we use the OpenAI API key from the environment."
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Ensure your SAP HANA instance is running. Load your credentials from environment variables and create a connection:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:16.802456Z",
|
||||
"start_time": "2023-09-09T08:02:07.065604Z"
|
||||
}
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"# Use OPENAI_API_KEY env variable\n",
|
||||
"# os.environ[\"OPENAI_API_KEY\"] = \"Your OpenAI API key\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a database connection to a HANA Cloud instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:28.174088Z",
|
||||
"start_time": "2023-09-09T08:02:28.162698Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from hdbcli import dbapi\n",
|
||||
"\n",
|
||||
@ -88,6 +69,92 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Learn more about SAP HANA in [What is SAP HANA?](https://www.sap.com/products/data-cloud/hana/what-is-sap-hana.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialization\n",
|
||||
"To initialize a `HanaDB` vector store, you need a database connection and an embedding instance. SAP HANA Cloud Vector Engine supports both external and internal embeddings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- #### Using External Embeddings\n",
|
||||
"\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- #### Using Internal Embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can compute embeddings directly in SAP HANA using its native `VECTOR_EMBEDDING()` function. To enable this, create an instance of `HanaInternalEmbeddings` with your internal model ID and pass it to `HanaDB`. Note that the `HanaInternalEmbeddings` instance is specifically designed for use with `HanaDB` and is not intended for use with other vector store implementations. For more information about internal embedding, see the [SAP HANA VECTOR_EMBEDDING Function](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/vector-embedding-function-vector).\n",
|
||||
"\n",
|
||||
"> **Caution:** Ensure NLP is enabled in your SAP HANA Cloud instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_hana import HanaInternalEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = HanaInternalEmbeddings(internal_embedding_model_id=\"SAP_NEB.20240715\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once you have your connection and embedding instance, create the vector store by passing them to `HanaDB` along with a table name for storing vectors:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_hana import HanaDB\n",
|
||||
"\n",
|
||||
"db = HanaDB(\n",
|
||||
" embedding=embeddings, connection=connection, table_name=\"STATE_OF_THE_UNION\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -104,7 +171,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:25.452472Z",
|
||||
@ -122,40 +189,16 @@
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.hanavector import HanaDB\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n",
|
||||
"text_documents = TextLoader(\n",
|
||||
" \"../../how_to/state_of_the_union.txt\", encoding=\"UTF-8\"\n",
|
||||
").load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"text_chunks = text_splitter.split_documents(text_documents)\n",
|
||||
"print(f\"Number of document chunks: {len(text_chunks)}\")\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a LangChain VectorStore interface for the HANA database and specify the table (collection) to use for accessing the vector embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:04:16.696625Z",
|
||||
"start_time": "2023-09-09T08:02:31.817790Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = HanaDB(\n",
|
||||
" embedding=embeddings, connection=connection, table_name=\"STATE_OF_THE_UNION\"\n",
|
||||
")"
|
||||
"print(f\"Number of document chunks: {len(text_chunks)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -167,7 +210,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -176,7 +219,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -199,7 +242,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -235,7 +278,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -254,7 +297,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
|
||||
"from langchain_hana.utils import DistanceStrategy\n",
|
||||
"\n",
|
||||
"db = HanaDB(\n",
|
||||
" embedding=embeddings,\n",
|
||||
@ -286,7 +329,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:05:23.276819Z",
|
||||
@ -336,7 +379,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -411,7 +454,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -420,7 +463,7 @@
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -443,7 +486,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -452,7 +495,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -471,7 +514,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -480,7 +523,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -508,7 +551,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -539,7 +582,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -578,13 +621,14 @@
|
||||
"| `$nin` | Not contained in a set of given values (not in) |\n",
|
||||
"| `$between` | Between the range of two boundary values |\n",
|
||||
"| `$like` | Text equality based on the \"LIKE\" semantics in SQL (using \"%\" as wildcard) |\n",
|
||||
"| `$contains` | Filters documents containing a specific keyword |\n",
|
||||
"| `$and` | Logical \"and\", supporting 2 or more operands |\n",
|
||||
"| `$or` | Logical \"or\", supporting 2 or more operands |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -592,15 +636,15 @@
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"First\",\n",
|
||||
" metadata={\"name\": \"adam\", \"is_active\": True, \"id\": 1, \"height\": 10.0},\n",
|
||||
" metadata={\"name\": \"Adam Smith\", \"is_active\": True, \"id\": 1, \"height\": 10.0},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Second\",\n",
|
||||
" metadata={\"name\": \"bob\", \"is_active\": False, \"id\": 2, \"height\": 5.7},\n",
|
||||
" metadata={\"name\": \"Bob Johnson\", \"is_active\": False, \"id\": 2, \"height\": 5.7},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Third\",\n",
|
||||
" metadata={\"name\": \"jane\", \"is_active\": True, \"id\": 3, \"height\": 2.4},\n",
|
||||
" metadata={\"name\": \"Jane Doe\", \"is_active\": True, \"id\": 3, \"height\": 2.4},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
@ -632,7 +676,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -640,19 +684,19 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'id': {'$ne': 1}}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'id': {'$gt': 1}}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'id': {'$gte': 1}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'id': {'$lt': 1}}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'id': {'$lte': 1}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n"
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -687,7 +731,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -695,13 +739,13 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'id': {'$between': (1, 2)}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$in': ['adam', 'bob']}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$nin': ['adam', 'bob']}}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$in': ['Adam Smith', 'Bob Johnson']}}\n",
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$nin': ['Adam Smith', 'Bob Johnson']}}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -710,11 +754,11 @@
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$in\": [\"adam\", \"bob\"]}}\n",
|
||||
"advanced_filter = {\"name\": {\"$in\": [\"Adam Smith\", \"Bob Johnson\"]}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$nin\": [\"adam\", \"bob\"]}}\n",
|
||||
"advanced_filter = {\"name\": {\"$nin\": [\"Adam Smith\", \"Bob Johnson\"]}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))"
|
||||
]
|
||||
@ -728,7 +772,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -736,10 +780,10 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'name': {'$like': 'a%'}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'name': {'$like': '%a%'}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -753,6 +797,51 @@
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Text filtering with `$contains`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'name': {'$contains': 'bob'}}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$contains': 'bo'}}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'name': {'$contains': 'Adam Johnson'}}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'name': {'$contains': 'Adam Smith'}}\n",
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"advanced_filter = {\"name\": {\"$contains\": \"bob\"}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$contains\": \"bo\"}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$contains\": \"Adam Johnson\"}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$contains\": \"Adam Smith\"}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -762,7 +851,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -770,14 +859,15 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'$or': [{'id': 1}, {'name': 'bob'}]}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"Filter: {'$and': [{'id': 1}, {'id': 2}]}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'$or': [{'id': 1}, {'id': 2}, {'id': 3}]}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'$and': [{'name': {'$contains': 'bob'}}, {'name': {'$contains': 'johnson'}}]}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -792,6 +882,12 @@
|
||||
"\n",
|
||||
"advanced_filter = {\"$or\": [{\"id\": 1}, {\"id\": 2}, {\"id\": 3}]}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\n",
|
||||
" \"$and\": [{\"name\": {\"$contains\": \"bob\"}}, {\"name\": {\"$contains\": \"johnson\"}}]\n",
|
||||
"}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))"
|
||||
]
|
||||
},
|
||||
@ -804,13 +900,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"# Access the vector DB with a new table\n",
|
||||
"db = HanaDB(\n",
|
||||
" connection=connection,\n",
|
||||
@ -837,7 +930,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -874,6 +967,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\")\n",
|
||||
"memory = ConversationBufferMemory(\n",
|
||||
@ -898,7 +993,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -907,7 +1002,7 @@
|
||||
"text": [
|
||||
"Answer from LLM:\n",
|
||||
"================\n",
|
||||
"The United States has set up joint patrols with Mexico and Guatemala to catch more human traffickers. This collaboration is part of the efforts to address immigration issues and secure the borders in the region.\n",
|
||||
"The United States has set up joint patrols with Mexico and Guatemala to catch more human traffickers at the border. This collaborative effort aims to improve border security and combat illegal activities such as human trafficking.\n",
|
||||
"================\n",
|
||||
"Number of used source document chunks: 5\n"
|
||||
]
|
||||
@ -954,7 +1049,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -963,12 +1058,12 @@
|
||||
"text": [
|
||||
"Answer from LLM:\n",
|
||||
"================\n",
|
||||
"Mexico and Guatemala are involved in joint patrols to catch human traffickers.\n"
|
||||
"Countries like Mexico and Guatemala are participating in joint patrols to catch human traffickers. The United States is also working with partners in South and Central America to host more refugees and secure their borders. Additionally, the U.S. is working with twenty-seven members of the European Union, as well as countries like France, Germany, Italy, the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and Switzerland.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"What about other countries?\"\n",
|
||||
"question = \"How many casualties were reported after that?\"\n",
|
||||
"\n",
|
||||
"result = qa_chain.invoke({\"question\": question})\n",
|
||||
"print(\"Answer from LLM:\")\n",
|
||||
@ -996,7 +1091,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1005,7 +1100,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -1038,7 +1133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1101,7 +1196,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1111,7 +1206,7 @@
|
||||
"None\n",
|
||||
"Some other text\n",
|
||||
"{\"start\": 400, \"end\": 450, \"doc_name\": \"other.txt\"}\n",
|
||||
"<memory at 0x7f5edcb18d00>\n"
|
||||
"<memory at 0x110f856c0>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1168,7 +1263,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1176,9 +1271,9 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Some other text\n",
|
||||
"Some more text\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Some more text\n"
|
||||
"Some other text\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1214,7 +1309,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1224,7 +1319,7 @@
|
||||
"Filters on this value are very performant\n",
|
||||
"Some other text\n",
|
||||
"{\"start\": 400, \"end\": 450, \"doc_name\": \"other.txt\", \"CUSTOMTEXT\": \"Filters on this value are very performant\"}\n",
|
||||
"<memory at 0x7f5edcb193c0>\n"
|
||||
"<memory at 0x110f859c0>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1291,7 +1386,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1299,9 +1394,9 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Some other text\n",
|
||||
"Some more text\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Some more text\n"
|
||||
"Some other text\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1330,9 +1425,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "lc3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "your_env_name"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@ -1344,7 +1439,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.14"
|
||||
"version": "3.10.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -89,7 +89,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "39f3ce3e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -118,15 +118,13 @@
|
||||
" language: str = Field(description=\"The language the text is written in\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# LLM\n",
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4o-mini\").with_structured_output(\n",
|
||||
" Classification\n",
|
||||
")"
|
||||
"# Structured LLM\n",
|
||||
"structured_llm = llm.with_structured_output(Classification)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"id": "5509b6a6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -144,7 +142,7 @@
|
||||
"source": [
|
||||
"inp = \"Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!\"\n",
|
||||
"prompt = tagging_prompt.invoke({\"input\": inp})\n",
|
||||
"response = llm.invoke(prompt)\n",
|
||||
"response = structured_llm.invoke(prompt)\n",
|
||||
"\n",
|
||||
"response"
|
||||
]
|
||||
@ -159,7 +157,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"id": "9154474c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -177,7 +175,7 @@
|
||||
"source": [
|
||||
"inp = \"Estoy muy enojado con vos! Te voy a dar tu merecido!\"\n",
|
||||
"prompt = tagging_prompt.invoke({\"input\": inp})\n",
|
||||
"response = llm.invoke(prompt)\n",
|
||||
"response = structured_llm.invoke(prompt)\n",
|
||||
"\n",
|
||||
"response.model_dump()"
|
||||
]
|
||||
|
@ -145,15 +145,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "a5e490f6-35ad-455e-8ae4-2bae021583ff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"# Define a custom prompt to provide instructions and any additional context.\n",
|
||||
"# 1) You can add examples into the prompt template to improve extraction quality\n",
|
||||
|
@ -135,6 +135,13 @@ ${llmVarName} = AzureChatOpenAI(
|
||||
apiKeyName: "AZURE_OPENAI_API_KEY",
|
||||
packageName: "langchain[openai]",
|
||||
},
|
||||
{
|
||||
value: "google_genai",
|
||||
label: "Google Gemini",
|
||||
model: "gemini-2.0-flash",
|
||||
apiKeyName: "GOOGLE_API_KEY",
|
||||
packageName: "langchain[google-genai]",
|
||||
},
|
||||
{
|
||||
value: "google_vertexai",
|
||||
label: "Google Vertex",
|
||||
|
@ -366,6 +366,12 @@ const FEATURE_TABLES = {
|
||||
package: "langchain-openai",
|
||||
apiLink: "https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html"
|
||||
},
|
||||
{
|
||||
name: "Google Gemini",
|
||||
link: "google-generative-ai",
|
||||
package: "langchain-google-genai",
|
||||
apiLink: "https://python.langchain.com/api_reference/google_genai/embeddings/langchain_google_genai.embeddings.GoogleGenerativeAIEmbeddings.html"
|
||||
},
|
||||
{
|
||||
name: "Together",
|
||||
link: "together",
|
||||
|
1
docs/static/js/google_analytics.js
vendored
1
docs/static/js/google_analytics.js
vendored
@ -3,3 +3,4 @@ function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-9B66JQQH2F');
|
||||
gtag('config', 'G-47WX3HKKY2');
|
||||
|
@ -1,2 +1,4 @@
|
||||
httpx
|
||||
grpcio
|
||||
aiohttp<3.11
|
||||
protobuf<3.21
|
||||
|
@ -5,7 +5,7 @@ build-backend = "pdm.backend"
|
||||
[project]
|
||||
authors = [{ name = "Erick Friis", email = "erick@langchain.dev" }]
|
||||
license = { text = "MIT" }
|
||||
requires-python = "<4.0,>=3.9"
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
"typer[all]<1.0.0,>=0.9.0",
|
||||
"gitpython<4,>=3",
|
||||
|
@ -1,9 +1,9 @@
|
||||
version = 1
|
||||
requires-python = ">=3.9, <4.0"
|
||||
revision = 1
|
||||
requires-python = ">=3.9"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.12.4'",
|
||||
"python_full_version >= '3.12' and python_full_version < '3.12.4'",
|
||||
"python_full_version < '3.12'",
|
||||
"python_full_version < '3.12.4'",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -407,7 +407,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain"
|
||||
version = "0.3.21"
|
||||
version = "0.3.24"
|
||||
source = { editable = "../langchain" }
|
||||
dependencies = [
|
||||
{ name = "async-timeout", marker = "python_full_version < '3.11'" },
|
||||
@ -438,6 +438,7 @@ requires-dist = [
|
||||
{ name = "langchain-mistralai", marker = "extra == 'mistralai'" },
|
||||
{ name = "langchain-ollama", marker = "extra == 'ollama'" },
|
||||
{ name = "langchain-openai", marker = "extra == 'openai'", editable = "../partners/openai" },
|
||||
{ name = "langchain-perplexity", marker = "extra == 'perplexity'" },
|
||||
{ name = "langchain-text-splitters", editable = "../text-splitters" },
|
||||
{ name = "langchain-together", marker = "extra == 'together'" },
|
||||
{ name = "langchain-xai", marker = "extra == 'xai'" },
|
||||
@ -447,6 +448,7 @@ requires-dist = [
|
||||
{ name = "requests", specifier = ">=2,<3" },
|
||||
{ name = "sqlalchemy", specifier = ">=1.4,<3" },
|
||||
]
|
||||
provides-extras = ["community", "anthropic", "openai", "azure-ai", "cohere", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "deepseek", "xai", "perplexity"]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
codespell = [{ name = "codespell", specifier = ">=2.2.0,<3.0.0" }]
|
||||
@ -473,7 +475,8 @@ test = [
|
||||
{ name = "langchain-tests", editable = "../standard-tests" },
|
||||
{ name = "langchain-text-splitters", editable = "../text-splitters" },
|
||||
{ name = "lark", specifier = ">=1.1.5,<2.0.0" },
|
||||
{ name = "numpy", specifier = ">=1.26.4,<3" },
|
||||
{ name = "numpy", marker = "python_full_version < '3.13'", specifier = ">=1.26.4" },
|
||||
{ name = "numpy", marker = "python_full_version >= '3.13'", specifier = ">=2.1.0" },
|
||||
{ name = "packaging", specifier = ">=24.2" },
|
||||
{ name = "pandas", specifier = ">=2.0.0,<3.0.0" },
|
||||
{ name = "pytest", specifier = ">=8,<9" },
|
||||
@ -502,9 +505,10 @@ test-integration = [
|
||||
typing = [
|
||||
{ name = "langchain-core", editable = "../core" },
|
||||
{ name = "langchain-text-splitters", editable = "../text-splitters" },
|
||||
{ name = "mypy", specifier = ">=1.10,<2.0" },
|
||||
{ name = "mypy", specifier = ">=1.15,<2.0" },
|
||||
{ name = "mypy-protobuf", specifier = ">=3.0.0,<4.0.0" },
|
||||
{ name = "numpy", specifier = ">=1.26.4,<3" },
|
||||
{ name = "numpy", marker = "python_full_version < '3.13'", specifier = ">=1.26.4" },
|
||||
{ name = "numpy", marker = "python_full_version >= '3.13'", specifier = ">=2.1.0" },
|
||||
{ name = "types-chardet", specifier = ">=5.0.4.6,<6.0.0.0" },
|
||||
{ name = "types-pytz", specifier = ">=2023.3.0.0,<2024.0.0.0" },
|
||||
{ name = "types-pyyaml", specifier = ">=6.0.12.2,<7.0.0.0" },
|
||||
@ -571,7 +575,7 @@ typing = [{ name = "langchain", editable = "../langchain" }]
|
||||
|
||||
[[package]]
|
||||
name = "langchain-core"
|
||||
version = "0.3.48"
|
||||
version = "0.3.56"
|
||||
source = { editable = "../core" }
|
||||
dependencies = [
|
||||
{ name = "jsonpatch" },
|
||||
@ -601,16 +605,18 @@ dev = [
|
||||
{ name = "jupyter", specifier = ">=1.0.0,<2.0.0" },
|
||||
{ name = "setuptools", specifier = ">=67.6.1,<68.0.0" },
|
||||
]
|
||||
lint = [{ name = "ruff", specifier = ">=0.9.2,<1.0.0" }]
|
||||
lint = [{ name = "ruff", specifier = ">=0.11.2,<0.12.0" }]
|
||||
test = [
|
||||
{ name = "blockbuster", specifier = "~=1.5.18" },
|
||||
{ name = "freezegun", specifier = ">=1.2.2,<2.0.0" },
|
||||
{ name = "grandalf", specifier = ">=0.8,<1.0" },
|
||||
{ name = "langchain-tests", directory = "../standard-tests" },
|
||||
{ name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.24.0,<2.0.0" },
|
||||
{ name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0,<3" },
|
||||
{ name = "numpy", marker = "python_full_version < '3.13'", specifier = ">=1.26.4" },
|
||||
{ name = "numpy", marker = "python_full_version >= '3.13'", specifier = ">=2.1.0" },
|
||||
{ name = "pytest", specifier = ">=8,<9" },
|
||||
{ name = "pytest-asyncio", specifier = ">=0.21.1,<1.0.0" },
|
||||
{ name = "pytest-benchmark" },
|
||||
{ name = "pytest-codspeed" },
|
||||
{ name = "pytest-mock", specifier = ">=3.10.0,<4.0.0" },
|
||||
{ name = "pytest-socket", specifier = ">=0.7.0,<1.0.0" },
|
||||
{ name = "pytest-watcher", specifier = ">=0.3.4,<1.0.0" },
|
||||
@ -621,15 +627,14 @@ test = [
|
||||
test-integration = []
|
||||
typing = [
|
||||
{ name = "langchain-text-splitters", directory = "../text-splitters" },
|
||||
{ name = "mypy", specifier = ">=1.10,<1.11" },
|
||||
{ name = "types-jinja2", specifier = ">=2.11.9,<3.0.0" },
|
||||
{ name = "mypy", specifier = ">=1.15,<1.16" },
|
||||
{ name = "types-pyyaml", specifier = ">=6.0.12.2,<7.0.0.0" },
|
||||
{ name = "types-requests", specifier = ">=2.28.11.5,<3.0.0.0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langchain-text-splitters"
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
source = { editable = "../text-splitters" }
|
||||
dependencies = [
|
||||
{ name = "langchain-core" },
|
||||
@ -666,7 +671,7 @@ test-integration = [
|
||||
]
|
||||
typing = [
|
||||
{ name = "lxml-stubs", specifier = ">=0.5.1,<1.0.0" },
|
||||
{ name = "mypy", specifier = ">=1.10,<2.0" },
|
||||
{ name = "mypy", specifier = ">=1.15,<2.0" },
|
||||
{ name = "tiktoken", specifier = ">=0.8.0,<1.0.0" },
|
||||
{ name = "types-requests", specifier = ">=2.31.0.20240218,<3.0.0.0" },
|
||||
]
|
||||
@ -694,19 +699,20 @@ all = [
|
||||
|
||||
[[package]]
|
||||
name = "langsmith"
|
||||
version = "0.3.5"
|
||||
version = "0.3.37"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "httpx" },
|
||||
{ name = "orjson", marker = "platform_python_implementation != 'PyPy'" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "requests" },
|
||||
{ name = "requests-toolbelt" },
|
||||
{ name = "zstandard" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b7/2c/d8acbc61896f5fc210a3f3bc8ddf39db5213b23eaf83a755ba334be30212/langsmith-0.3.5.tar.gz", hash = "sha256:d891a205f70ab0b2c26311db6c52486ffc9fc1124238b999619445f6ae900725", size = 321847 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7b/d0/98daffe57c57c2f44c5d363df5004d8e530b8c9b15751f451d273fd1d4c8/langsmith-0.3.37.tar.gz", hash = "sha256:d49d9a12d24d3984d5b3e2b5915b525b4a29a4706ea9cadde43c980fba43fab0", size = 344645 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e6/00/dbbb9df2c575217326021da731534f246dce4bb6e95b55432ff7191643ac/langsmith-0.3.5-py3-none-any.whl", hash = "sha256:29da924d2e3662dd56f96d179ebc06662b66dd0b2317362ccebe0de1b78750e7", size = 333276 },
|
||||
{ url = "https://files.pythonhosted.org/packages/50/f2/5700dbeec7dca0aa57a6ed2f472fa3a323b46c85ab2bc446b2c7c8fb599e/langsmith-0.3.37-py3-none-any.whl", hash = "sha256:bdecca4eb48ba1799e821a33dbdca318ab202faa71a5bfa7d2358be6c3fd7eeb", size = 359308 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -104,7 +104,7 @@ def convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
||||
additional_kwargs["name"] = _dict["name"]
|
||||
return ToolMessage(
|
||||
content=_dict.get("content", ""),
|
||||
tool_call_id=_dict.get("tool_call_id"), # type: ignore[arg-type]
|
||||
tool_call_id=_dict.get("tool_call_id"),
|
||||
additional_kwargs=additional_kwargs,
|
||||
)
|
||||
else:
|
||||
|
@ -22,7 +22,7 @@ class AzureAiServicesToolkit(BaseToolkit):
|
||||
|
||||
tools: List[BaseTool] = [
|
||||
AzureAiServicesDocumentIntelligenceTool(), # type: ignore[call-arg]
|
||||
AzureAiServicesImageAnalysisTool(), # type: ignore[call-arg]
|
||||
AzureAiServicesImageAnalysisTool(),
|
||||
AzureAiServicesSpeechToTextTool(), # type: ignore[call-arg]
|
||||
AzureAiServicesTextToSpeechTool(), # type: ignore[call-arg]
|
||||
AzureAiServicesTextAnalyticsForHealthTool(), # type: ignore[call-arg]
|
||||
|
@ -81,7 +81,7 @@ class FileManagementToolkit(BaseToolkit):
|
||||
tools: List[BaseTool] = []
|
||||
for tool in allowed_tools:
|
||||
tool_cls = _FILE_TOOLS_MAP[tool]
|
||||
tools.append(tool_cls(root_dir=self.root_dir)) # type: ignore[call-arg]
|
||||
tools.append(tool_cls(root_dir=self.root_dir))
|
||||
return tools
|
||||
|
||||
|
||||
|
@ -13,7 +13,7 @@ from langchain_community.tools.openapi.utils.openapi_utils import OpenAPISpec
|
||||
from langchain_community.utilities.requests import Requests
|
||||
|
||||
|
||||
class NLATool(Tool): # type: ignore[override]
|
||||
class NLATool(Tool):
|
||||
"""Natural Language API Tool."""
|
||||
|
||||
@classmethod
|
||||
|
@ -64,7 +64,7 @@ def _get_default_llm_chain_factory(
|
||||
return partial(_get_default_llm_chain, prompt)
|
||||
|
||||
|
||||
class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[override]
|
||||
class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Requests GET tool with LLM-instructed extraction of truncated responses."""
|
||||
|
||||
name: str = "requests_get"
|
||||
@ -98,7 +98,7 @@ class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[ov
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[override]
|
||||
class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Requests POST tool with LLM-instructed extraction of truncated responses."""
|
||||
|
||||
name: str = "requests_post"
|
||||
@ -129,7 +129,7 @@ class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[o
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[override]
|
||||
class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Requests PATCH tool with LLM-instructed extraction of truncated responses."""
|
||||
|
||||
name: str = "requests_patch"
|
||||
@ -162,7 +162,7 @@ class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[override]
|
||||
class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Requests PUT tool with LLM-instructed extraction of truncated responses."""
|
||||
|
||||
name: str = "requests_put"
|
||||
@ -193,7 +193,7 @@ class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[ov
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool): # type: ignore[override]
|
||||
class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
"""Tool that sends a DELETE request and parses the response."""
|
||||
|
||||
name: str = "requests_delete"
|
||||
@ -266,7 +266,7 @@ def _create_api_controller_agent(
|
||||
if "GET" in allowed_operations:
|
||||
get_llm_chain = LLMChain(llm=llm, prompt=PARSING_GET_PROMPT)
|
||||
tools.append(
|
||||
RequestsGetToolWithParsing( # type: ignore[call-arg]
|
||||
RequestsGetToolWithParsing(
|
||||
requests_wrapper=requests_wrapper,
|
||||
llm_chain=get_llm_chain,
|
||||
allow_dangerous_requests=allow_dangerous_requests,
|
||||
@ -275,7 +275,7 @@ def _create_api_controller_agent(
|
||||
if "POST" in allowed_operations:
|
||||
post_llm_chain = LLMChain(llm=llm, prompt=PARSING_POST_PROMPT)
|
||||
tools.append(
|
||||
RequestsPostToolWithParsing( # type: ignore[call-arg]
|
||||
RequestsPostToolWithParsing(
|
||||
requests_wrapper=requests_wrapper,
|
||||
llm_chain=post_llm_chain,
|
||||
allow_dangerous_requests=allow_dangerous_requests,
|
||||
@ -284,7 +284,7 @@ def _create_api_controller_agent(
|
||||
if "PUT" in allowed_operations:
|
||||
put_llm_chain = LLMChain(llm=llm, prompt=PARSING_PUT_PROMPT)
|
||||
tools.append(
|
||||
RequestsPutToolWithParsing( # type: ignore[call-arg]
|
||||
RequestsPutToolWithParsing(
|
||||
requests_wrapper=requests_wrapper,
|
||||
llm_chain=put_llm_chain,
|
||||
allow_dangerous_requests=allow_dangerous_requests,
|
||||
@ -293,7 +293,7 @@ def _create_api_controller_agent(
|
||||
if "DELETE" in allowed_operations:
|
||||
delete_llm_chain = LLMChain(llm=llm, prompt=PARSING_DELETE_PROMPT)
|
||||
tools.append(
|
||||
RequestsDeleteToolWithParsing( # type: ignore[call-arg]
|
||||
RequestsDeleteToolWithParsing(
|
||||
requests_wrapper=requests_wrapper,
|
||||
llm_chain=delete_llm_chain,
|
||||
allow_dangerous_requests=allow_dangerous_requests,
|
||||
@ -302,7 +302,7 @@ def _create_api_controller_agent(
|
||||
if "PATCH" in allowed_operations:
|
||||
patch_llm_chain = LLMChain(llm=llm, prompt=PARSING_PATCH_PROMPT)
|
||||
tools.append(
|
||||
RequestsPatchToolWithParsing( # type: ignore[call-arg]
|
||||
RequestsPatchToolWithParsing(
|
||||
requests_wrapper=requests_wrapper,
|
||||
llm_chain=patch_llm_chain,
|
||||
allow_dangerous_requests=allow_dangerous_requests,
|
||||
|
@ -75,7 +75,7 @@ class PowerBIToolkit(BaseToolkit):
|
||||
powerbi=self.powerbi,
|
||||
examples=self.examples,
|
||||
max_iterations=self.max_iterations,
|
||||
output_token_limit=self.output_token_limit, # type: ignore[arg-type]
|
||||
output_token_limit=self.output_token_limit,
|
||||
tiktoken_model_name=self.tiktoken_model_name,
|
||||
),
|
||||
InfoPowerBITool(powerbi=self.powerbi),
|
||||
|
@ -289,7 +289,7 @@ class OpenAIAssistantV2Runnable(OpenAIAssistantRunnable):
|
||||
name=name,
|
||||
instructions=instructions,
|
||||
tools=[_get_assistants_tool(tool) for tool in tools],
|
||||
tool_resources=tool_resources, # type: ignore[arg-type]
|
||||
tool_resources=tool_resources,
|
||||
model=model,
|
||||
extra_body=extra_body,
|
||||
**model_kwargs,
|
||||
@ -431,7 +431,7 @@ class OpenAIAssistantV2Runnable(OpenAIAssistantRunnable):
|
||||
name=name,
|
||||
instructions=instructions,
|
||||
tools=openai_tools,
|
||||
tool_resources=tool_resources, # type: ignore[arg-type]
|
||||
tool_resources=tool_resources,
|
||||
model=model,
|
||||
)
|
||||
return cls(assistant_id=assistant.id, async_client=async_client, **kwargs)
|
||||
|
@ -579,7 +579,7 @@ class AsyncRedisCache(_RedisCacheBase):
|
||||
try:
|
||||
async with self.redis.pipeline() as pipe:
|
||||
self._configure_pipeline_for_update(key, pipe, return_val, self.ttl)
|
||||
await pipe.execute() # type: ignore[attr-defined]
|
||||
await pipe.execute()
|
||||
except Exception as e:
|
||||
logger.error(f"Redis async update failed: {e}")
|
||||
|
||||
|
@ -378,7 +378,7 @@ def create_ernie_fn_chain(
|
||||
output_key: str = "function",
|
||||
output_parser: Optional[BaseLLMOutputParser] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLMChain: # type: ignore[valid-type]
|
||||
) -> LLMChain:
|
||||
"""[Legacy] Create an LLM chain that uses Ernie functions.
|
||||
|
||||
Args:
|
||||
@ -455,7 +455,7 @@ def create_ernie_fn_chain(
|
||||
}
|
||||
if len(ernie_functions) == 1:
|
||||
llm_kwargs["function_call"] = {"name": ernie_functions[0]["name"]}
|
||||
llm_chain = LLMChain( # type: ignore[misc]
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
output_parser=output_parser,
|
||||
@ -474,7 +474,7 @@ def create_structured_output_chain(
|
||||
output_key: str = "function",
|
||||
output_parser: Optional[BaseLLMOutputParser] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLMChain: # type: ignore[valid-type]
|
||||
) -> LLMChain:
|
||||
"""[Legacy] Create an LLMChain that uses an Ernie function to get a structured output.
|
||||
|
||||
Args:
|
||||
|
@ -324,7 +324,7 @@ class GraphCypherQAChain(Chain):
|
||||
|
||||
cypher_generation_chain = LLMChain(
|
||||
llm=cypher_llm or llm, # type: ignore[arg-type]
|
||||
**use_cypher_llm_kwargs, # type: ignore[arg-type]
|
||||
**use_cypher_llm_kwargs,
|
||||
)
|
||||
|
||||
if exclude_types and include_types:
|
||||
|
@ -235,7 +235,7 @@ class MemgraphQAChain(Chain):
|
||||
llm_to_use = cypher_llm if cypher_llm is not None else llm
|
||||
|
||||
if prompt is not None and llm_to_use is not None:
|
||||
cypher_generation_chain = prompt | llm_to_use | StrOutputParser() # type: ignore[arg-type]
|
||||
cypher_generation_chain = prompt | llm_to_use | StrOutputParser()
|
||||
else:
|
||||
raise ValueError(
|
||||
"Missing required components for the cypher generation chain: "
|
||||
|
@ -181,7 +181,7 @@ class NeptuneSparqlQAChain(Chain):
|
||||
)
|
||||
sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_prompt)
|
||||
|
||||
return cls( # type: ignore[call-arg]
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
sparql_generation_chain=sparql_generation_chain,
|
||||
examples=examples,
|
||||
|
@ -28,7 +28,7 @@ class LLMRequestsChain(Chain):
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
llm_chain: LLMChain # type: ignore[valid-type]
|
||||
llm_chain: LLMChain
|
||||
requests_wrapper: TextRequestsWrapper = Field(
|
||||
default_factory=lambda: TextRequestsWrapper(headers=DEFAULT_HEADERS),
|
||||
exclude=True,
|
||||
@ -88,7 +88,7 @@ class LLMRequestsChain(Chain):
|
||||
# extract the text from the html
|
||||
soup = BeautifulSoup(res, "html.parser")
|
||||
other_keys[self.requests_key] = soup.get_text()[: self.text_length]
|
||||
result = self.llm_chain.predict( # type: ignore[attr-defined]
|
||||
result = self.llm_chain.predict(
|
||||
callbacks=_run_manager.get_child(), **other_keys
|
||||
)
|
||||
return {self.output_key: result}
|
||||
|
@ -158,7 +158,7 @@ class IMessageChatLoader(BaseChatLoader):
|
||||
continue
|
||||
|
||||
results.append(
|
||||
HumanMessage( # type: ignore[call-arg]
|
||||
HumanMessage(
|
||||
role=sender,
|
||||
content=content,
|
||||
additional_kwargs={
|
||||
|
@ -52,7 +52,7 @@ class SlackChatLoader(BaseChatLoader):
|
||||
)
|
||||
else:
|
||||
results.append(
|
||||
HumanMessage( # type: ignore[call-arg]
|
||||
HumanMessage(
|
||||
role=sender,
|
||||
content=text,
|
||||
additional_kwargs={
|
||||
|
@ -78,7 +78,7 @@ def map_ai_messages_in_session(chat_sessions: ChatSession, sender: str) -> ChatS
|
||||
message = AIMessage(
|
||||
content=message.content,
|
||||
additional_kwargs=message.additional_kwargs.copy(),
|
||||
example=getattr(message, "example", None), # type: ignore[arg-type]
|
||||
example=getattr(message, "example", None),
|
||||
)
|
||||
num_converted += 1
|
||||
messages.append(message)
|
||||
|
@ -73,7 +73,7 @@ class WhatsAppChatLoader(BaseChatLoader):
|
||||
timestamp, sender, text = result.groups()
|
||||
if not self._ignore_lines.match(text.strip()):
|
||||
results.append(
|
||||
HumanMessage( # type: ignore[call-arg]
|
||||
HumanMessage(
|
||||
role=sender,
|
||||
content=text,
|
||||
additional_kwargs={
|
||||
|
@ -85,7 +85,7 @@ def create_message_model(table_name: str, DynamicBase: Any) -> Any:
|
||||
"""
|
||||
|
||||
# Model declared inside a function to have a dynamic table name.
|
||||
class Message(DynamicBase): # type: ignore[valid-type, misc]
|
||||
class Message(DynamicBase):
|
||||
__tablename__ = table_name
|
||||
id = Column(Integer, primary_key=True)
|
||||
session_id = Column(Text)
|
||||
|
@ -167,7 +167,7 @@ class ChatAnyscale(ChatOpenAI):
|
||||
else:
|
||||
values["openai_api_base"] = values["anyscale_api_base"]
|
||||
values["openai_api_key"] = values["anyscale_api_key"].get_secret_value()
|
||||
values["client"] = openai.ChatCompletion # type: ignore[attr-defined]
|
||||
values["client"] = openai.ChatCompletion
|
||||
except AttributeError as exc:
|
||||
raise ValueError(
|
||||
"`openai` has no `ChatCompletion` attribute, this is likely "
|
||||
|
@ -227,7 +227,7 @@ class AzureChatOpenAI(ChatOpenAI):
|
||||
**client_params
|
||||
).chat.completions
|
||||
else:
|
||||
values["client"] = openai.ChatCompletion # type: ignore[attr-defined]
|
||||
values["client"] = openai.ChatCompletion
|
||||
return values
|
||||
|
||||
@property
|
||||
|
@ -304,7 +304,7 @@ class AzureMLChatOnlineEndpoint(BaseChatModel, AzureMLBaseEndpoint):
|
||||
"http_client": None,
|
||||
}
|
||||
|
||||
client = openai.OpenAI(**client_params) # type: ignore[arg-type, arg-type, arg-type, arg-type, arg-type, arg-type]
|
||||
client = openai.OpenAI(**client_params)
|
||||
message_dicts = [
|
||||
CustomOpenAIChatContentFormatter._convert_message_to_dict(m)
|
||||
for m in messages
|
||||
@ -312,30 +312,30 @@ class AzureMLChatOnlineEndpoint(BaseChatModel, AzureMLBaseEndpoint):
|
||||
params = {"stream": True, "stop": stop, "model": None, **kwargs}
|
||||
|
||||
default_chunk_class = AIMessageChunk
|
||||
for chunk in client.chat.completions.create(messages=message_dicts, **params): # type: ignore[arg-type]
|
||||
for chunk in client.chat.completions.create(messages=message_dicts, **params):
|
||||
if not isinstance(chunk, dict):
|
||||
chunk = chunk.dict() # type: ignore[attr-defined]
|
||||
if len(chunk["choices"]) == 0: # type: ignore[call-overload]
|
||||
chunk = chunk.dict()
|
||||
if len(chunk["choices"]) == 0:
|
||||
continue
|
||||
choice = chunk["choices"][0] # type: ignore[call-overload]
|
||||
chunk = _convert_delta_to_message_chunk( # type: ignore[assignment]
|
||||
choice["delta"], # type: ignore[arg-type, index]
|
||||
default_chunk_class, # type: ignore[arg-type, index]
|
||||
choice = chunk["choices"][0]
|
||||
chunk = _convert_delta_to_message_chunk(
|
||||
choice["delta"],
|
||||
default_chunk_class,
|
||||
)
|
||||
generation_info = {}
|
||||
if finish_reason := choice.get("finish_reason"): # type: ignore[union-attr]
|
||||
if finish_reason := choice.get("finish_reason"):
|
||||
generation_info["finish_reason"] = finish_reason
|
||||
logprobs = choice.get("logprobs") # type: ignore[union-attr]
|
||||
logprobs = choice.get("logprobs")
|
||||
if logprobs:
|
||||
generation_info["logprobs"] = logprobs
|
||||
default_chunk_class = chunk.__class__ # type: ignore[assignment]
|
||||
chunk = ChatGenerationChunk( # type: ignore[assignment]
|
||||
message=chunk, # type: ignore[arg-type]
|
||||
generation_info=generation_info or None, # type: ignore[arg-type]
|
||||
default_chunk_class = chunk.__class__
|
||||
chunk = ChatGenerationChunk(
|
||||
message=chunk,
|
||||
generation_info=generation_info or None,
|
||||
)
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs) # type: ignore[attr-defined, arg-type]
|
||||
yield chunk # type: ignore[misc]
|
||||
run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs)
|
||||
yield chunk
|
||||
|
||||
async def _astream(
|
||||
self,
|
||||
@ -359,7 +359,7 @@ class AzureMLChatOnlineEndpoint(BaseChatModel, AzureMLBaseEndpoint):
|
||||
"http_client": None,
|
||||
}
|
||||
|
||||
async_client = openai.AsyncOpenAI(**client_params) # type: ignore[arg-type, arg-type, arg-type, arg-type, arg-type, arg-type]
|
||||
async_client = openai.AsyncOpenAI(**client_params)
|
||||
message_dicts = [
|
||||
CustomOpenAIChatContentFormatter._convert_message_to_dict(m)
|
||||
for m in messages
|
||||
@ -367,9 +367,9 @@ class AzureMLChatOnlineEndpoint(BaseChatModel, AzureMLBaseEndpoint):
|
||||
params = {"stream": True, "stop": stop, "model": None, **kwargs}
|
||||
|
||||
default_chunk_class = AIMessageChunk
|
||||
async for chunk in await async_client.chat.completions.create( # type: ignore[attr-defined]
|
||||
messages=message_dicts, # type: ignore[arg-type]
|
||||
**params, # type: ignore[arg-type]
|
||||
async for chunk in await async_client.chat.completions.create(
|
||||
messages=message_dicts,
|
||||
**params,
|
||||
):
|
||||
if not isinstance(chunk, dict):
|
||||
chunk = chunk.dict()
|
||||
|
@ -128,7 +128,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
||||
return AIMessage(
|
||||
content=content,
|
||||
additional_kwargs=additional_kwargs,
|
||||
tool_calls=tool_calls, # type: ignore[arg-type]
|
||||
tool_calls=tool_calls,
|
||||
invalid_tool_calls=invalid_tool_calls,
|
||||
)
|
||||
elif role == "tool":
|
||||
@ -137,7 +137,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
||||
additional_kwargs["name"] = _dict["name"]
|
||||
return ToolMessage(
|
||||
content=content,
|
||||
tool_call_id=_dict.get("tool_call_id"), # type: ignore[arg-type]
|
||||
tool_call_id=_dict.get("tool_call_id"),
|
||||
additional_kwargs=additional_kwargs,
|
||||
)
|
||||
elif role == "system":
|
||||
|
@ -821,7 +821,7 @@ class QianfanChatEndpoint(BaseChatModel):
|
||||
if is_pydantic_schema:
|
||||
output_parser: OutputParserLike = PydanticToolsParser(
|
||||
tools=[schema], # type: ignore[list-item]
|
||||
first_tool_only=True, # type: ignore[list-item]
|
||||
first_tool_only=True,
|
||||
)
|
||||
else:
|
||||
key_name = convert_to_openai_tool(schema)["function"]["name"]
|
||||
|
@ -213,7 +213,7 @@ class ChatCloudflareWorkersAI(BaseChatModel):
|
||||
if is_pydantic_schema:
|
||||
output_parser: OutputParserLike = PydanticToolsParser(
|
||||
tools=[schema], # type: ignore[list-item]
|
||||
first_tool_only=True, # type: ignore[list-item]
|
||||
first_tool_only=True,
|
||||
)
|
||||
else:
|
||||
output_parser = JsonOutputKeyToolsParser(
|
||||
@ -222,7 +222,7 @@ class ChatCloudflareWorkersAI(BaseChatModel):
|
||||
elif method == "json_mode":
|
||||
llm = self.bind(response_format={"type": "json_object"})
|
||||
output_parser = (
|
||||
PydanticOutputParser(pydantic_object=schema) # type: ignore[type-var, arg-type]
|
||||
PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
|
||||
if is_pydantic_schema
|
||||
else JsonOutputParser()
|
||||
)
|
||||
|
@ -110,7 +110,7 @@ class ChatEverlyAI(ChatOpenAI):
|
||||
"Please install it with `pip install openai`.",
|
||||
) from e
|
||||
try:
|
||||
values["client"] = openai.ChatCompletion # type: ignore[attr-defined]
|
||||
values["client"] = openai.ChatCompletion
|
||||
except AttributeError as exc:
|
||||
raise ValueError(
|
||||
"`openai` has no `ChatCompletion` attribute, this is likely "
|
||||
|
@ -70,11 +70,11 @@ def _create_retry_decorator(llm: JinaChat) -> Callable[[Any], Any]:
|
||||
stop=stop_after_attempt(llm.max_retries),
|
||||
wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds),
|
||||
retry=(
|
||||
retry_if_exception_type(openai.error.Timeout) # type: ignore[attr-defined]
|
||||
| retry_if_exception_type(openai.error.APIError) # type: ignore[attr-defined]
|
||||
| retry_if_exception_type(openai.error.APIConnectionError) # type: ignore[attr-defined]
|
||||
| retry_if_exception_type(openai.error.RateLimitError) # type: ignore[attr-defined]
|
||||
| retry_if_exception_type(openai.error.ServiceUnavailableError) # type: ignore[attr-defined]
|
||||
retry_if_exception_type(openai.error.Timeout)
|
||||
| retry_if_exception_type(openai.error.APIError)
|
||||
| retry_if_exception_type(openai.error.APIConnectionError)
|
||||
| retry_if_exception_type(openai.error.RateLimitError)
|
||||
| retry_if_exception_type(openai.error.ServiceUnavailableError)
|
||||
),
|
||||
before_sleep=before_sleep_log(logger, logging.WARNING),
|
||||
)
|
||||
@ -234,7 +234,7 @@ class JinaChat(BaseChatModel):
|
||||
"Please install it with `pip install openai`."
|
||||
)
|
||||
try:
|
||||
values["client"] = openai.ChatCompletion # type: ignore[attr-defined]
|
||||
values["client"] = openai.ChatCompletion
|
||||
except AttributeError:
|
||||
raise ValueError(
|
||||
"`openai` has no `ChatCompletion` attribute, this is likely "
|
||||
@ -266,11 +266,11 @@ class JinaChat(BaseChatModel):
|
||||
stop=stop_after_attempt(self.max_retries),
|
||||
wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds),
|
||||
retry=(
|
||||
retry_if_exception_type(openai.error.Timeout) # type: ignore[attr-defined]
|
||||
| retry_if_exception_type(openai.error.APIError) # type: ignore[attr-defined]
|
||||
| retry_if_exception_type(openai.error.APIConnectionError) # type: ignore[attr-defined]
|
||||
| retry_if_exception_type(openai.error.RateLimitError) # type: ignore[attr-defined]
|
||||
| retry_if_exception_type(openai.error.ServiceUnavailableError) # type: ignore[attr-defined]
|
||||
retry_if_exception_type(openai.error.Timeout)
|
||||
| retry_if_exception_type(openai.error.APIError)
|
||||
| retry_if_exception_type(openai.error.APIConnectionError)
|
||||
| retry_if_exception_type(openai.error.RateLimitError)
|
||||
| retry_if_exception_type(openai.error.ServiceUnavailableError)
|
||||
),
|
||||
before_sleep=before_sleep_log(logger, logging.WARNING),
|
||||
)
|
||||
|
@ -42,7 +42,7 @@ DEFAULT_MODEL = "meta-llama/Llama-2-13b-chat-hf"
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ChatKonko(ChatOpenAI): # type: ignore[override]
|
||||
class ChatKonko(ChatOpenAI):
|
||||
"""`ChatKonko` Chat large language models API.
|
||||
|
||||
To use, you should have the ``konko`` python package installed, and the
|
||||
|
@ -664,7 +664,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
||||
additional_kwargs=additional_kwargs,
|
||||
name=name,
|
||||
id=id_,
|
||||
tool_calls=tool_calls, # type: ignore[arg-type]
|
||||
tool_calls=tool_calls,
|
||||
invalid_tool_calls=invalid_tool_calls,
|
||||
)
|
||||
elif role == "system":
|
||||
|
@ -777,7 +777,7 @@ class MiniMaxChat(BaseChatModel):
|
||||
if is_pydantic_schema:
|
||||
output_parser: OutputParserLike = PydanticToolsParser(
|
||||
tools=[schema], # type: ignore[list-item]
|
||||
first_tool_only=True, # type: ignore[list-item]
|
||||
first_tool_only=True,
|
||||
)
|
||||
else:
|
||||
key_name = convert_to_openai_tool(schema)["function"]["name"]
|
||||
|
@ -12,7 +12,7 @@ from langchain_community.chat_models import ChatOpenAI
|
||||
from langchain_community.llms.moonshot import MOONSHOT_SERVICE_URL_BASE, MoonshotCommon
|
||||
|
||||
|
||||
class MoonshotChat(MoonshotCommon, ChatOpenAI): # type: ignore[misc, override, override]
|
||||
class MoonshotChat(MoonshotCommon, ChatOpenAI): # type: ignore[misc]
|
||||
"""Moonshot chat model integration.
|
||||
|
||||
Setup:
|
||||
|
@ -587,7 +587,7 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment):
|
||||
if method == "json_mode":
|
||||
llm = self.bind(response_format={"type": "json_object"})
|
||||
output_parser = (
|
||||
PydanticOutputParser(pydantic_object=schema) # type: ignore[type-var, arg-type]
|
||||
PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
|
||||
if is_pydantic_schema
|
||||
else JsonOutputParser()
|
||||
)
|
||||
|
@ -725,7 +725,7 @@ class ChatOCIGenAI(BaseChatModel, OCIGenAIBase):
|
||||
elif method == "json_mode":
|
||||
llm = self.bind(response_format={"type": "json_object"})
|
||||
output_parser = (
|
||||
PydanticOutputParser(pydantic_object=schema) # type: ignore[type-var, arg-type]
|
||||
PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
|
||||
if is_pydantic_schema
|
||||
else JsonOutputParser()
|
||||
)
|
||||
|
@ -98,7 +98,7 @@ class ChatOctoAI(ChatOpenAI):
|
||||
else:
|
||||
values["openai_api_base"] = values["octoai_api_base"]
|
||||
values["openai_api_key"] = values["octoai_api_token"].get_secret_value()
|
||||
values["client"] = openai.ChatCompletion # type: ignore[attr-defined]
|
||||
values["client"] = openai.ChatCompletion
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import openai python package. "
|
||||
|
@ -88,11 +88,11 @@ def _create_retry_decorator(
|
||||
import openai
|
||||
|
||||
errors = [
|
||||
openai.error.Timeout, # type: ignore[attr-defined]
|
||||
openai.error.APIError, # type: ignore[attr-defined]
|
||||
openai.error.APIConnectionError, # type: ignore[attr-defined]
|
||||
openai.error.RateLimitError, # type: ignore[attr-defined]
|
||||
openai.error.ServiceUnavailableError, # type: ignore[attr-defined]
|
||||
openai.error.Timeout,
|
||||
openai.error.APIError,
|
||||
openai.error.APIConnectionError,
|
||||
openai.error.RateLimitError,
|
||||
openai.error.ServiceUnavailableError,
|
||||
]
|
||||
return create_base_retry_decorator(
|
||||
error_types=errors, max_retries=llm.max_retries, run_manager=run_manager
|
||||
@ -358,7 +358,7 @@ class ChatOpenAI(BaseChatModel):
|
||||
**client_params
|
||||
).chat.completions
|
||||
elif not values.get("client"):
|
||||
values["client"] = openai.ChatCompletion # type: ignore[attr-defined]
|
||||
values["client"] = openai.ChatCompletion
|
||||
else:
|
||||
pass
|
||||
return values
|
||||
@ -595,7 +595,7 @@ class ChatOpenAI(BaseChatModel):
|
||||
if self.openai_proxy:
|
||||
import openai
|
||||
|
||||
openai.proxy = {"http": self.openai_proxy, "https": self.openai_proxy} # type: ignore[attr-defined]
|
||||
openai.proxy = {"http": self.openai_proxy, "https": self.openai_proxy}
|
||||
return {**self._default_params, **openai_creds}
|
||||
|
||||
def _get_invocation_params(
|
||||
|
@ -486,7 +486,7 @@ class ChatPerplexity(BaseChatModel):
|
||||
if is_pydantic_schema and hasattr(
|
||||
schema, "model_json_schema"
|
||||
): # accounting for pydantic v1 and v2
|
||||
response_format = schema.model_json_schema() # type: ignore[union-attr]
|
||||
response_format = schema.model_json_schema()
|
||||
elif is_pydantic_schema:
|
||||
response_format = schema.schema() # type: ignore[union-attr]
|
||||
elif isinstance(schema, dict):
|
||||
|
@ -636,7 +636,7 @@ class ChatSambaNovaCloud(BaseChatModel):
|
||||
if is_pydantic_schema:
|
||||
output_parser: OutputParserLike[Any] = PydanticToolsParser(
|
||||
tools=[schema], # type: ignore[list-item]
|
||||
first_tool_only=True, # type: ignore[list-item]
|
||||
first_tool_only=True,
|
||||
)
|
||||
else:
|
||||
output_parser = JsonOutputKeyToolsParser(
|
||||
@ -648,7 +648,7 @@ class ChatSambaNovaCloud(BaseChatModel):
|
||||
# llm = self.bind(response_format={"type": "json_object"})
|
||||
if is_pydantic_schema:
|
||||
schema = cast(Type[BaseModel], schema)
|
||||
output_parser = PydanticOutputParser(pydantic_object=schema) # type: ignore[type-var, arg-type]
|
||||
output_parser = PydanticOutputParser(pydantic_object=schema)
|
||||
else:
|
||||
output_parser = JsonOutputParser()
|
||||
|
||||
@ -666,7 +666,7 @@ class ChatSambaNovaCloud(BaseChatModel):
|
||||
# )
|
||||
if is_pydantic_schema:
|
||||
schema = cast(Type[BaseModel], schema)
|
||||
output_parser = PydanticOutputParser(pydantic_object=schema) # type: ignore[type-var, arg-type]
|
||||
output_parser = PydanticOutputParser(pydantic_object=schema)
|
||||
else:
|
||||
output_parser = JsonOutputParser()
|
||||
else:
|
||||
|
@ -13,7 +13,7 @@ from langchain_community.llms.solar import SOLAR_SERVICE_URL_BASE, SolarCommon
|
||||
@deprecated( # type: ignore[arg-type]
|
||||
since="0.0.34", removal="1.0", alternative_import="langchain_upstage.ChatUpstage"
|
||||
)
|
||||
class SolarChat(SolarCommon, ChatOpenAI): # type: ignore[override, override]
|
||||
class SolarChat(SolarCommon, ChatOpenAI):
|
||||
"""Wrapper around Solar large language models.
|
||||
To use, you should have the ``openai`` python package installed, and the
|
||||
environment variable ``SOLAR_API_KEY`` set with your API key.
|
||||
|
@ -176,7 +176,7 @@ class ChatNebula(BaseChatModel):
|
||||
json_payload = json.dumps(payload)
|
||||
|
||||
async with ClientSession() as session:
|
||||
async with session.post( # type: ignore[call-arg]
|
||||
async with session.post( # type: ignore[call-arg,unused-ignore]
|
||||
url, data=json_payload, headers=headers, stream=True
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
|
@ -140,7 +140,7 @@ def convert_dict_to_message(
|
||||
else AIMessage(
|
||||
content=content,
|
||||
additional_kwargs=additional_kwargs,
|
||||
tool_calls=tool_calls, # type: ignore[arg-type]
|
||||
tool_calls=tool_calls,
|
||||
invalid_tool_calls=invalid_tool_calls,
|
||||
)
|
||||
)
|
||||
@ -163,7 +163,7 @@ def convert_dict_to_message(
|
||||
if is_chunk
|
||||
else ToolMessage(
|
||||
content=_dict.get("content", ""),
|
||||
tool_call_id=_dict.get("tool_call_id"), # type: ignore[arg-type]
|
||||
tool_call_id=_dict.get("tool_call_id"),
|
||||
additional_kwargs=additional_kwargs,
|
||||
)
|
||||
)
|
||||
@ -894,7 +894,7 @@ class ChatTongyi(BaseChatModel):
|
||||
if is_pydantic_schema:
|
||||
output_parser: OutputParserLike = PydanticToolsParser(
|
||||
tools=[schema], # type: ignore[list-item]
|
||||
first_tool_only=True, # type: ignore[list-item]
|
||||
first_tool_only=True,
|
||||
)
|
||||
else:
|
||||
key_name = convert_to_openai_tool(schema)["function"]["name"]
|
||||
|
@ -209,7 +209,7 @@ def _get_question(messages: List[BaseMessage]) -> HumanMessage:
|
||||
removal="1.0",
|
||||
alternative_import="langchain_google_vertexai.ChatVertexAI",
|
||||
)
|
||||
class ChatVertexAI(_VertexAICommon, BaseChatModel): # type: ignore[override]
|
||||
class ChatVertexAI(_VertexAICommon, BaseChatModel):
|
||||
"""`Vertex AI` Chat large language models API."""
|
||||
|
||||
model_name: str = "chat-bison"
|
||||
|
@ -162,7 +162,7 @@ def _convert_dict_to_message(dct: Dict[str, Any]) -> BaseMessage:
|
||||
additional_kwargs["name"] = dct["name"]
|
||||
return ToolMessage(
|
||||
content=content,
|
||||
tool_call_id=dct.get("tool_call_id"), # type: ignore[arg-type]
|
||||
tool_call_id=dct.get("tool_call_id"),
|
||||
additional_kwargs=additional_kwargs,
|
||||
)
|
||||
return ChatMessage(role=role, content=content) # type: ignore[arg-type]
|
||||
@ -861,7 +861,7 @@ class ChatZhipuAI(BaseChatModel):
|
||||
if is_pydantic_schema:
|
||||
output_parser: OutputParserLike = PydanticToolsParser(
|
||||
tools=[schema], # type: ignore[list-item]
|
||||
first_tool_only=True, # type: ignore[list-item]
|
||||
first_tool_only=True,
|
||||
)
|
||||
else:
|
||||
output_parser = JsonOutputKeyToolsParser(
|
||||
|
@ -226,8 +226,8 @@ class AsyncHtmlLoader(BaseLoader):
|
||||
# in a separate loop, in a separate thread.
|
||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
||||
future: Future[List[str]] = executor.submit(
|
||||
asyncio.run, # type: ignore[arg-type]
|
||||
self.fetch_all(self.web_paths), # type: ignore[arg-type]
|
||||
asyncio.run,
|
||||
self.fetch_all(self.web_paths),
|
||||
)
|
||||
results = future.result()
|
||||
except RuntimeError:
|
||||
|
@ -224,7 +224,7 @@ class CloudBlobLoader(BlobLoader):
|
||||
yield self.path
|
||||
return
|
||||
|
||||
paths = self.path.glob(self.glob) # type: ignore[attr-defined]
|
||||
paths = self.path.glob(self.glob)
|
||||
for path in paths:
|
||||
if self.exclude:
|
||||
if any(path.match(glob) for glob in self.exclude):
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Iterator, Optional, Sequence
|
||||
from typing import Any, Dict, Iterator, Optional, Sequence
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -8,7 +8,7 @@ from langchain_community.document_loaders.base import BaseLoader
|
||||
class BrowserbaseLoader(BaseLoader):
|
||||
"""Load pre-rendered web pages using a headless browser hosted on Browserbase.
|
||||
|
||||
Depends on `browserbase` package.
|
||||
Depends on `browserbase` and `playwright` packages.
|
||||
Get your API key from https://browserbase.com
|
||||
"""
|
||||
|
||||
@ -24,6 +24,7 @@ class BrowserbaseLoader(BaseLoader):
|
||||
self.urls = urls
|
||||
self.text_content = text_content
|
||||
self.session_id = session_id
|
||||
self.project_id = project_id
|
||||
self.proxy = proxy
|
||||
|
||||
try:
|
||||
@ -32,22 +33,57 @@ class BrowserbaseLoader(BaseLoader):
|
||||
raise ImportError(
|
||||
"You must run "
|
||||
"`pip install --upgrade "
|
||||
"browserbase` "
|
||||
"browserbase playwright` "
|
||||
"to use the Browserbase loader."
|
||||
)
|
||||
|
||||
self.browserbase = Browserbase(api_key, project_id)
|
||||
self.browserbase = Browserbase(api_key=api_key)
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load pages from URLs"""
|
||||
pages = self.browserbase.load_urls(
|
||||
self.urls, self.text_content, self.session_id, self.proxy
|
||||
)
|
||||
|
||||
for i, page in enumerate(pages):
|
||||
yield Document(
|
||||
page_content=page,
|
||||
metadata={
|
||||
"url": self.urls[i],
|
||||
},
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"playwright is required for BrowserbaseLoader. "
|
||||
"Please run `pip install --upgrade playwright`."
|
||||
)
|
||||
|
||||
for url in self.urls:
|
||||
with sync_playwright() as playwright:
|
||||
# Create or use existing session
|
||||
if self.session_id:
|
||||
session = self.browserbase.sessions.retrieve(id=self.session_id)
|
||||
else:
|
||||
if not self.project_id:
|
||||
raise ValueError("project_id is required to create a session")
|
||||
session_params: Dict[str, Any] = {"project_id": self.project_id}
|
||||
if self.proxy is not None:
|
||||
session_params["proxy"] = bool(self.proxy)
|
||||
session = self.browserbase.sessions.create(**session_params)
|
||||
|
||||
# Connect to the remote session
|
||||
browser = playwright.chromium.connect_over_cdp(session.connect_url)
|
||||
context = browser.contexts[0]
|
||||
page = context.pages[0]
|
||||
|
||||
# Navigate to URL and get content
|
||||
page.goto(url)
|
||||
# Get content based on the text_content flag
|
||||
if self.text_content:
|
||||
page_text = page.inner_text("body")
|
||||
content = str(page_text)
|
||||
else:
|
||||
page_html = page.content()
|
||||
content = str(page_html)
|
||||
|
||||
# Close browser
|
||||
page.close()
|
||||
browser.close()
|
||||
|
||||
yield Document(
|
||||
page_content=content,
|
||||
metadata={
|
||||
"url": url,
|
||||
},
|
||||
)
|
||||
|
@ -24,9 +24,9 @@ class ConcurrentLoader(GenericLoader):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
blob_loader: BlobLoader, # type: ignore[valid-type]
|
||||
blob_loader: BlobLoader,
|
||||
blob_parser: BaseBlobParser,
|
||||
num_workers: int = 4, # type: ignore[valid-type]
|
||||
num_workers: int = 4,
|
||||
) -> None:
|
||||
super().__init__(blob_loader, blob_parser)
|
||||
self.num_workers = num_workers
|
||||
@ -40,7 +40,7 @@ class ConcurrentLoader(GenericLoader):
|
||||
) as executor:
|
||||
futures = {
|
||||
executor.submit(self.blob_parser.lazy_parse, blob)
|
||||
for blob in self.blob_loader.yield_blobs() # type: ignore[attr-defined]
|
||||
for blob in self.blob_loader.yield_blobs()
|
||||
}
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
yield from future.result()
|
||||
@ -72,7 +72,7 @@ class ConcurrentLoader(GenericLoader):
|
||||
num_workers: Max number of concurrent workers to use.
|
||||
parser_kwargs: Keyword arguments to pass to the parser.
|
||||
"""
|
||||
blob_loader = FileSystemBlobLoader( # type: ignore[attr-defined, misc]
|
||||
blob_loader = FileSystemBlobLoader(
|
||||
path,
|
||||
glob=glob,
|
||||
exclude=exclude,
|
||||
|
@ -428,7 +428,7 @@ class ConfluenceLoader(BaseLoader):
|
||||
self.number_of_retries # type: ignore[arg-type]
|
||||
),
|
||||
wait=wait_exponential(
|
||||
multiplier=1, # type: ignore[arg-type]
|
||||
multiplier=1,
|
||||
min=self.min_retry_seconds, # type: ignore[arg-type]
|
||||
max=self.max_retry_seconds, # type: ignore[arg-type]
|
||||
),
|
||||
|
@ -223,4 +223,4 @@ class UnstructuredCSVLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.csv import partition_csv
|
||||
|
||||
return partition_csv(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_csv(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -101,7 +101,7 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
self.url_path = url_path
|
||||
self.bytes_source = bytes_source
|
||||
|
||||
self.parser = AzureAIDocumentIntelligenceParser( # type: ignore[misc]
|
||||
self.parser = AzureAIDocumentIntelligenceParser(
|
||||
api_endpoint=api_endpoint,
|
||||
api_key=api_key,
|
||||
api_version=api_version,
|
||||
@ -116,10 +116,10 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
) -> Iterator[Document]:
|
||||
"""Lazy load the document as pages."""
|
||||
if self.file_path is not None:
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_path(self.file_path)
|
||||
yield from self.parser.parse(blob)
|
||||
elif self.url_path is not None:
|
||||
yield from self.parser.parse_url(self.url_path) # type: ignore[arg-type]
|
||||
yield from self.parser.parse_url(self.url_path)
|
||||
elif self.bytes_source is not None:
|
||||
yield from self.parser.parse_bytes(self.bytes_source)
|
||||
else:
|
||||
|
@ -60,16 +60,16 @@ class UnstructuredEmailLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.file_utils.filetype import FileType, detect_filetype
|
||||
|
||||
filetype = detect_filetype(self.file_path) # type: ignore[arg-type]
|
||||
filetype = detect_filetype(self.file_path)
|
||||
|
||||
if filetype == FileType.EML:
|
||||
from unstructured.partition.email import partition_email
|
||||
|
||||
return partition_email(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_email(filename=self.file_path, **self.unstructured_kwargs)
|
||||
elif satisfies_min_unstructured_version("0.5.8") and filetype == FileType.MSG:
|
||||
from unstructured.partition.msg import partition_msg
|
||||
|
||||
return partition_msg(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_msg(filename=self.file_path, **self.unstructured_kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Filetype {filetype} is not supported in UnstructuredEmailLoader."
|
||||
|
@ -52,4 +52,4 @@ class UnstructuredEPubLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.epub import partition_epub
|
||||
|
||||
return partition_epub(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_epub(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -49,4 +49,4 @@ class UnstructuredExcelLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.xlsx import partition_xlsx
|
||||
|
||||
return partition_xlsx(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_xlsx(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -96,7 +96,7 @@ class GenericLoader(BaseLoader):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
blob_loader: BlobLoader, # type: ignore[valid-type]
|
||||
blob_loader: BlobLoader,
|
||||
blob_parser: BaseBlobParser,
|
||||
) -> None:
|
||||
"""A generic document loader.
|
||||
@ -112,7 +112,7 @@ class GenericLoader(BaseLoader):
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
"""Load documents lazily. Use this when working at a large scale."""
|
||||
for blob in self.blob_loader.yield_blobs(): # type: ignore[attr-defined]
|
||||
for blob in self.blob_loader.yield_blobs():
|
||||
yield from self.blob_parser.lazy_parse(blob)
|
||||
|
||||
def load_and_split(
|
||||
@ -159,7 +159,7 @@ class GenericLoader(BaseLoader):
|
||||
Returns:
|
||||
A generic document loader.
|
||||
"""
|
||||
blob_loader = FileSystemBlobLoader( # type: ignore[attr-defined, misc]
|
||||
blob_loader = FileSystemBlobLoader(
|
||||
path,
|
||||
glob=glob,
|
||||
exclude=exclude,
|
||||
|
@ -74,7 +74,7 @@ class GitLoader(BaseLoader):
|
||||
|
||||
file_path = os.path.join(self.repo_path, item.path)
|
||||
|
||||
ignored_files = repo.ignored([file_path]) # type: ignore[arg-type]
|
||||
ignored_files = repo.ignored([file_path])
|
||||
if len(ignored_files):
|
||||
continue
|
||||
|
||||
|
@ -48,4 +48,4 @@ class UnstructuredHTMLLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.html import partition_html
|
||||
|
||||
return partition_html(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_html(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -48,4 +48,4 @@ class UnstructuredImageLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.image import partition_image
|
||||
|
||||
return partition_image(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_image(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -76,13 +76,13 @@ class ImageCaptionLoader(BaseLoader):
|
||||
|
||||
try:
|
||||
if isinstance(image, bytes):
|
||||
image = Image.open(BytesIO(image)).convert("RGB") # type: ignore[assignment]
|
||||
image = Image.open(BytesIO(image)).convert("RGB")
|
||||
elif isinstance(image, str) and (
|
||||
image.startswith("http://") or image.startswith("https://")
|
||||
):
|
||||
image = Image.open(requests.get(image, stream=True).raw).convert("RGB") # type: ignore[assignment, arg-type]
|
||||
image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
|
||||
else:
|
||||
image = Image.open(image).convert("RGB") # type: ignore[assignment]
|
||||
image = Image.open(image).convert("RGB")
|
||||
except Exception:
|
||||
if isinstance(image_source, bytes):
|
||||
msg = "Could not get image data from bytes"
|
||||
|
@ -93,4 +93,4 @@ class UnstructuredMarkdownLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.md import partition_md
|
||||
|
||||
return partition_md(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_md(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -52,4 +52,4 @@ class UnstructuredODTLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.odt import partition_odt
|
||||
|
||||
return partition_odt(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_odt(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -52,4 +52,4 @@ class UnstructuredOrgModeLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.org import partition_org
|
||||
|
||||
return partition_org(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_org(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -322,7 +322,7 @@ class OpenAIWhisperParser(BaseBlobParser):
|
||||
model=self.model, file=file_obj, **self._create_params
|
||||
)
|
||||
else:
|
||||
transcript = openai.Audio.transcribe(self.model, file_obj) # type: ignore[attr-defined]
|
||||
transcript = openai.Audio.transcribe(self.model, file_obj)
|
||||
break
|
||||
except Exception as e:
|
||||
attempts += 1
|
||||
|
@ -9,7 +9,7 @@ from langchain_community.document_loaders.blob_loaders import Blob
|
||||
class MsWordParser(BaseBlobParser):
|
||||
"""Parse the Microsoft Word documents from a blob."""
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Parse a Microsoft Word document into the Document iterator.
|
||||
|
||||
Args:
|
||||
@ -33,13 +33,13 @@ class MsWordParser(BaseBlobParser):
|
||||
partition_docx
|
||||
),
|
||||
}
|
||||
if blob.mimetype not in ( # type: ignore[attr-defined]
|
||||
if blob.mimetype not in (
|
||||
"application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
):
|
||||
raise ValueError("This blob type is not supported for this parser.")
|
||||
with blob.as_bytes_io() as word_document: # type: ignore[attr-defined]
|
||||
elements = mime_type_parser[blob.mimetype](file=word_document) # type: ignore[attr-defined] # type: ignore[operator] # type: ignore[operator] # type: ignore[operator] # type: ignore[operator] # type: ignore[operator] # type: ignore[operator]
|
||||
with blob.as_bytes_io() as word_document:
|
||||
elements = mime_type_parser[blob.mimetype](file=word_document)
|
||||
text = "\n\n".join([str(el) for el in elements])
|
||||
metadata = {"source": blob.source} # type: ignore[attr-defined]
|
||||
metadata = {"source": blob.source}
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
@ -340,7 +340,7 @@ class PyPDFParser(BaseBlobParser):
|
||||
self.extraction_mode = extraction_mode
|
||||
self.extraction_kwargs = extraction_kwargs or {}
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""
|
||||
Lazily parse the blob.
|
||||
Insert image, if possible, between two paragraphs.
|
||||
@ -380,7 +380,7 @@ class PyPDFParser(BaseBlobParser):
|
||||
**self.extraction_kwargs,
|
||||
)
|
||||
|
||||
with blob.as_bytes_io() as pdf_file_obj: # type: ignore[attr-defined]
|
||||
with blob.as_bytes_io() as pdf_file_obj:
|
||||
pdf_reader = pypdf.PdfReader(pdf_file_obj, password=self.password)
|
||||
|
||||
doc_metadata = _purge_metadata(
|
||||
@ -434,7 +434,7 @@ class PyPDFParser(BaseBlobParser):
|
||||
if "/XObject" not in cast(dict, page["/Resources"]).keys():
|
||||
return ""
|
||||
|
||||
xObject = page["/Resources"]["/XObject"].get_object() # type: ignore[index]
|
||||
xObject = page["/Resources"]["/XObject"].get_object()
|
||||
images = []
|
||||
for obj in xObject:
|
||||
np_image: Any = None
|
||||
@ -677,7 +677,7 @@ class PDFMinerParser(BaseBlobParser):
|
||||
|
||||
return metadata
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""
|
||||
Lazily parse the blob.
|
||||
Insert image, if possible, between two paragraphs.
|
||||
@ -919,7 +919,7 @@ class PyMuPDFParser(BaseBlobParser):
|
||||
self.extract_tables = extract_tables
|
||||
self.extract_tables_settings = extract_tables_settings
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
return self._lazy_parse(
|
||||
blob,
|
||||
)
|
||||
@ -930,7 +930,7 @@ class PyMuPDFParser(BaseBlobParser):
|
||||
# text-kwargs is present for backwards compatibility.
|
||||
# Users should not use it directly.
|
||||
text_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
) -> Iterator[Document]:
|
||||
"""Lazily parse the blob.
|
||||
Insert image, if possible, between two paragraphs.
|
||||
In this way, a paragraph can be continued on the next page.
|
||||
@ -990,8 +990,8 @@ class PyMuPDFParser(BaseBlobParser):
|
||||
)
|
||||
|
||||
with PyMuPDFParser._lock:
|
||||
with blob.as_bytes_io() as file_path: # type: ignore[attr-defined]
|
||||
if blob.data is None: # type: ignore[attr-defined]
|
||||
with blob.as_bytes_io() as file_path:
|
||||
if blob.data is None:
|
||||
doc = pymupdf.open(file_path)
|
||||
else:
|
||||
doc = pymupdf.open(stream=file_path, filetype="pdf")
|
||||
@ -1066,8 +1066,8 @@ class PyMuPDFParser(BaseBlobParser):
|
||||
"producer": "PyMuPDF",
|
||||
"creator": "PyMuPDF",
|
||||
"creationdate": "",
|
||||
"source": blob.source, # type: ignore[attr-defined]
|
||||
"file_path": blob.source, # type: ignore[attr-defined]
|
||||
"source": blob.source,
|
||||
"file_path": blob.source,
|
||||
"total_pages": len(doc),
|
||||
},
|
||||
**{
|
||||
@ -1273,7 +1273,7 @@ class PyPDFium2Parser(BaseBlobParser):
|
||||
self.mode = mode
|
||||
self.pages_delimiter = pages_delimiter
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""
|
||||
Lazily parse the blob.
|
||||
Insert image, if possible, between two paragraphs.
|
||||
@ -1299,7 +1299,7 @@ class PyPDFium2Parser(BaseBlobParser):
|
||||
# pypdfium2 is really finicky with respect to closing things,
|
||||
# if done incorrectly creates seg faults.
|
||||
with PyPDFium2Parser._lock:
|
||||
with blob.as_bytes_io() as file_path: # type: ignore[attr-defined]
|
||||
with blob.as_bytes_io() as file_path:
|
||||
pdf_reader = None
|
||||
try:
|
||||
pdf_reader = pypdfium2.PdfDocument(
|
||||
@ -1410,11 +1410,11 @@ class PDFPlumberParser(BaseBlobParser):
|
||||
self.dedupe = dedupe
|
||||
self.extract_images = extract_images
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse the blob."""
|
||||
import pdfplumber
|
||||
|
||||
with blob.as_bytes_io() as file_path: # type: ignore[attr-defined]
|
||||
with blob.as_bytes_io() as file_path:
|
||||
doc = pdfplumber.open(file_path) # open document
|
||||
|
||||
yield from [
|
||||
@ -1424,8 +1424,8 @@ class PDFPlumberParser(BaseBlobParser):
|
||||
+ self._extract_images_from_page(page),
|
||||
metadata=dict(
|
||||
{
|
||||
"source": blob.source, # type: ignore[attr-defined]
|
||||
"file_path": blob.source, # type: ignore[attr-defined]
|
||||
"source": blob.source,
|
||||
"file_path": blob.source,
|
||||
"page": page.page_number - 1,
|
||||
"total_pages": len(doc.pages),
|
||||
},
|
||||
@ -1593,14 +1593,14 @@ class AmazonTextractPDFParser(BaseBlobParser):
|
||||
else:
|
||||
self.boto3_textract_client = client
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Iterates over the Blob pages and returns an Iterator with a Document
|
||||
for each page, like the other parsers If multi-page document, blob.path
|
||||
has to be set to the S3 URI and for single page docs
|
||||
the blob.data is taken
|
||||
"""
|
||||
|
||||
url_parse_result = urlparse(str(blob.path)) if blob.path else None # type: ignore[attr-defined]
|
||||
url_parse_result = urlparse(str(blob.path)) if blob.path else None
|
||||
# Either call with S3 path (multi-page) or with bytes (single-page)
|
||||
if (
|
||||
url_parse_result
|
||||
@ -1608,13 +1608,13 @@ class AmazonTextractPDFParser(BaseBlobParser):
|
||||
and url_parse_result.netloc
|
||||
):
|
||||
textract_response_json = self.tc.call_textract(
|
||||
input_document=str(blob.path), # type: ignore[attr-defined]
|
||||
input_document=str(blob.path),
|
||||
features=self.textract_features,
|
||||
boto3_textract_client=self.boto3_textract_client,
|
||||
)
|
||||
else:
|
||||
textract_response_json = self.tc.call_textract(
|
||||
input_document=blob.as_bytes(), # type: ignore[attr-defined]
|
||||
input_document=blob.as_bytes(),
|
||||
features=self.textract_features,
|
||||
call_mode=self.tc.Textract_Call_Mode.FORCE_SYNC,
|
||||
boto3_textract_client=self.boto3_textract_client,
|
||||
@ -1625,7 +1625,7 @@ class AmazonTextractPDFParser(BaseBlobParser):
|
||||
for idx, page in enumerate(document.pages):
|
||||
yield Document(
|
||||
page_content=page.get_text(config=self.linearization_config),
|
||||
metadata={"source": blob.source, "page": idx + 1}, # type: ignore[attr-defined]
|
||||
metadata={"source": blob.source, "page": idx + 1},
|
||||
)
|
||||
|
||||
|
||||
@ -1645,23 +1645,23 @@ class DocumentIntelligenceParser(BaseBlobParser):
|
||||
self.client = client
|
||||
self.model = model
|
||||
|
||||
def _generate_docs(self, blob: Blob, result: Any) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def _generate_docs(self, blob: Blob, result: Any) -> Iterator[Document]:
|
||||
for p in result.pages:
|
||||
content = " ".join([line.content for line in p.lines])
|
||||
|
||||
d = Document(
|
||||
page_content=content,
|
||||
metadata={
|
||||
"source": blob.source, # type: ignore[attr-defined]
|
||||
"source": blob.source,
|
||||
"page": p.page_number,
|
||||
},
|
||||
)
|
||||
yield d
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse the blob."""
|
||||
|
||||
with blob.as_bytes_io() as file_obj: # type: ignore[attr-defined]
|
||||
with blob.as_bytes_io() as file_obj:
|
||||
poller = self.client.begin_analyze_document(self.model, file_obj)
|
||||
result = poller.result()
|
||||
|
||||
|
@ -11,6 +11,6 @@ from langchain_community.document_loaders.blob_loaders import Blob
|
||||
class TextParser(BaseBlobParser):
|
||||
"""Parser for text blobs."""
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse the blob."""
|
||||
yield Document(page_content=blob.as_string(), metadata={"source": blob.source}) # type: ignore[attr-defined]
|
||||
yield Document(page_content=blob.as_string(), metadata={"source": blob.source})
|
||||
|
@ -91,7 +91,7 @@ class UnstructuredPDFLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> list:
|
||||
from unstructured.partition.pdf import partition_pdf
|
||||
|
||||
return partition_pdf(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_pdf(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
||||
|
||||
class BasePDFLoader(BaseLoader, ABC):
|
||||
@ -299,11 +299,9 @@ class PyPDFLoader(BasePDFLoader):
|
||||
In this way, a paragraph can be continued on the next page.
|
||||
"""
|
||||
if self.web_path:
|
||||
blob = Blob.from_data( # type: ignore[attr-defined]
|
||||
open(self.file_path, "rb").read(), path=self.web_path
|
||||
)
|
||||
blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path)
|
||||
else:
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_path(self.file_path)
|
||||
yield from self.parser.lazy_parse(blob)
|
||||
|
||||
|
||||
@ -415,11 +413,9 @@ class PyPDFium2Loader(BasePDFLoader):
|
||||
In this way, a paragraph can be continued on the next page.
|
||||
"""
|
||||
if self.web_path:
|
||||
blob = Blob.from_data( # type: ignore[attr-defined]
|
||||
open(self.file_path, "rb").read(), path=self.web_path
|
||||
)
|
||||
blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path)
|
||||
else:
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_path(self.file_path)
|
||||
yield from self.parser.parse(blob)
|
||||
|
||||
|
||||
@ -674,11 +670,9 @@ class PDFMinerLoader(BasePDFLoader):
|
||||
In this way, a paragraph can be continued on the next page.
|
||||
"""
|
||||
if self.web_path:
|
||||
blob = Blob.from_data( # type: ignore[attr-defined]
|
||||
open(self.file_path, "rb").read(), path=self.web_path
|
||||
)
|
||||
blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path)
|
||||
else:
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_path(self.file_path)
|
||||
yield from self.parser.lazy_parse(blob)
|
||||
|
||||
|
||||
@ -850,9 +844,9 @@ class PyMuPDFLoader(BasePDFLoader):
|
||||
)
|
||||
parser = self.parser
|
||||
if self.web_path:
|
||||
blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path)
|
||||
else:
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_path(self.file_path)
|
||||
yield from parser._lazy_parse(blob, text_kwargs=kwargs)
|
||||
|
||||
def load(self, **kwargs: Any) -> list[Document]:
|
||||
@ -1046,9 +1040,9 @@ class PDFPlumberLoader(BasePDFLoader):
|
||||
extract_images=self.extract_images,
|
||||
)
|
||||
if self.web_path:
|
||||
blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path)
|
||||
else:
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_path(self.file_path)
|
||||
return parser.parse(blob)
|
||||
|
||||
|
||||
@ -1163,7 +1157,7 @@ class AmazonTextractPDFLoader(BasePDFLoader):
|
||||
# raises ValueError when multipage and not on S3"""
|
||||
|
||||
if self.web_path and self._is_s3_url(self.web_path):
|
||||
blob = Blob(path=self.web_path) # type: ignore[call-arg] # type: ignore[misc]
|
||||
blob = Blob(path=self.web_path)
|
||||
else:
|
||||
blob = Blob.from_path(self.file_path)
|
||||
if AmazonTextractPDFLoader._get_number_of_pages(blob) > 1:
|
||||
@ -1176,7 +1170,7 @@ class AmazonTextractPDFLoader(BasePDFLoader):
|
||||
yield from self.parser.parse(blob)
|
||||
|
||||
@staticmethod
|
||||
def _get_number_of_pages(blob: Blob) -> int: # type: ignore[valid-type]
|
||||
def _get_number_of_pages(blob: Blob) -> int:
|
||||
try:
|
||||
import pypdf
|
||||
from PIL import Image, ImageSequence
|
||||
@ -1186,22 +1180,20 @@ class AmazonTextractPDFLoader(BasePDFLoader):
|
||||
"Could not import pypdf or Pilloe python package. "
|
||||
"Please install it with `pip install pypdf Pillow`."
|
||||
)
|
||||
if blob.mimetype == "application/pdf": # type: ignore[attr-defined]
|
||||
with blob.as_bytes_io() as input_pdf_file: # type: ignore[attr-defined]
|
||||
if blob.mimetype == "application/pdf":
|
||||
with blob.as_bytes_io() as input_pdf_file:
|
||||
pdf_reader = pypdf.PdfReader(input_pdf_file)
|
||||
return len(pdf_reader.pages)
|
||||
elif blob.mimetype == "image/tiff": # type: ignore[attr-defined]
|
||||
elif blob.mimetype == "image/tiff":
|
||||
num_pages = 0
|
||||
img = Image.open(blob.as_bytes()) # type: ignore[attr-defined]
|
||||
img = Image.open(blob.as_bytes())
|
||||
for _, _ in enumerate(ImageSequence.Iterator(img)):
|
||||
num_pages += 1
|
||||
return num_pages
|
||||
elif blob.mimetype in ["image/png", "image/jpeg"]: # type: ignore[attr-defined]
|
||||
elif blob.mimetype in ["image/png", "image/jpeg"]:
|
||||
return 1
|
||||
else:
|
||||
raise ValueError( # type: ignore[attr-defined]
|
||||
f"unsupported mime type: {blob.mimetype}"
|
||||
)
|
||||
raise ValueError(f"unsupported mime type: {blob.mimetype}")
|
||||
|
||||
|
||||
class DedocPDFLoader(DedocBaseLoader):
|
||||
@ -1348,7 +1340,7 @@ class DocumentIntelligenceLoader(BasePDFLoader):
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
"""Lazy load given path as pages."""
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_path(self.file_path)
|
||||
yield from self.parser.parse(blob)
|
||||
|
||||
|
||||
|
@ -59,7 +59,7 @@ class UnstructuredPowerPointLoader(UnstructuredFileLoader):
|
||||
try:
|
||||
import magic # noqa: F401
|
||||
|
||||
is_ppt = detect_filetype(self.file_path) == FileType.PPT # type: ignore[arg-type]
|
||||
is_ppt = detect_filetype(self.file_path) == FileType.PPT
|
||||
except ImportError:
|
||||
_, extension = os.path.splitext(str(self.file_path))
|
||||
is_ppt = extension == ".ppt"
|
||||
@ -70,8 +70,8 @@ class UnstructuredPowerPointLoader(UnstructuredFileLoader):
|
||||
if is_ppt:
|
||||
from unstructured.partition.ppt import partition_ppt
|
||||
|
||||
return partition_ppt(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_ppt(filename=self.file_path, **self.unstructured_kwargs)
|
||||
else:
|
||||
from unstructured.partition.pptx import partition_pptx
|
||||
|
||||
return partition_pptx(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_pptx(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -56,4 +56,4 @@ class UnstructuredRSTLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.rst import partition_rst
|
||||
|
||||
return partition_rst(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_rst(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -56,4 +56,4 @@ class UnstructuredRTFLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.rtf import partition_rtf
|
||||
|
||||
return partition_rtf(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_rtf(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -185,7 +185,7 @@ class SitemapLoader(WebBaseLoader):
|
||||
|
||||
els.append(
|
||||
{
|
||||
tag: prop.text
|
||||
tag: prop.text.strip()
|
||||
for tag in ["loc", "lastmod", "changefreq", "priority"]
|
||||
if (prop := url.find(tag))
|
||||
}
|
||||
|
@ -39,4 +39,4 @@ class UnstructuredTSVLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.tsv import partition_tsv
|
||||
|
||||
return partition_tsv(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_tsv(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -37,7 +37,7 @@ class VsdxLoader(BaseLoader, ABC):
|
||||
elif not os.path.isfile(self.file_path):
|
||||
raise ValueError("File path %s is not a valid file or url" % self.file_path)
|
||||
|
||||
self.parser = VsdxParser() # type: ignore[misc]
|
||||
self.parser = VsdxParser()
|
||||
|
||||
def __del__(self) -> None:
|
||||
if hasattr(self, "temp_file"):
|
||||
@ -50,5 +50,5 @@ class VsdxLoader(BaseLoader, ABC):
|
||||
return bool(parsed.netloc) and bool(parsed.scheme)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
blob = Blob.from_path(self.file_path)
|
||||
return list(self.parser.parse(blob))
|
||||
|
@ -33,7 +33,7 @@ class WeatherDataLoader(BaseLoader):
|
||||
def from_params(
|
||||
cls, places: Sequence[str], *, openweathermap_api_key: Optional[str] = None
|
||||
) -> WeatherDataLoader:
|
||||
client = OpenWeatherMapAPIWrapper(openweathermap_api_key=openweathermap_api_key) # type: ignore[call-arg]
|
||||
client = OpenWeatherMapAPIWrapper(openweathermap_api_key=openweathermap_api_key)
|
||||
return cls(client, places)
|
||||
|
||||
def lazy_load(
|
||||
|
@ -121,7 +121,7 @@ class UnstructuredWordDocumentLoader(UnstructuredFileLoader):
|
||||
try:
|
||||
import magic # noqa: F401
|
||||
|
||||
is_doc = detect_filetype(self.file_path) == FileType.DOC # type: ignore[arg-type]
|
||||
is_doc = detect_filetype(self.file_path) == FileType.DOC
|
||||
except ImportError:
|
||||
_, extension = os.path.splitext(str(self.file_path))
|
||||
is_doc = extension == ".doc"
|
||||
@ -132,8 +132,8 @@ class UnstructuredWordDocumentLoader(UnstructuredFileLoader):
|
||||
if is_doc:
|
||||
from unstructured.partition.doc import partition_doc
|
||||
|
||||
return partition_doc(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_doc(filename=self.file_path, **self.unstructured_kwargs)
|
||||
else:
|
||||
from unstructured.partition.docx import partition_docx
|
||||
|
||||
return partition_docx(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_docx(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -46,4 +46,4 @@ class UnstructuredXMLLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.xml import partition_xml
|
||||
|
||||
return partition_xml(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]
|
||||
return partition_xml(filename=self.file_path, **self.unstructured_kwargs)
|
||||
|
@ -33,7 +33,7 @@ class GoogleTranslateTransformer(BaseDocumentTransformer):
|
||||
"""
|
||||
try:
|
||||
from google.api_core.client_options import ClientOptions
|
||||
from google.cloud import translate # type: ignore[attr-defined]
|
||||
from google.cloud import translate
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Install Google Cloud Translate to use this parser."
|
||||
@ -76,7 +76,7 @@ class GoogleTranslateTransformer(BaseDocumentTransformer):
|
||||
Options: `text/plain`, `text/html`
|
||||
"""
|
||||
try:
|
||||
from google.cloud import translate # type: ignore[attr-defined]
|
||||
from google.cloud import translate
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Install Google Cloud Translate to use this parser."
|
||||
|
@ -58,7 +58,7 @@ class OpenAIMetadataTagger(BaseDocumentTransformer, BaseModel):
|
||||
new_documents = []
|
||||
|
||||
for document in documents:
|
||||
extracted_metadata: Dict = self.tagging_chain.run(document.page_content) # type: ignore[assignment]
|
||||
extracted_metadata: Dict = self.tagging_chain.run(document.page_content)
|
||||
new_document = Document(
|
||||
page_content=document.page_content,
|
||||
metadata={**extracted_metadata, **document.metadata},
|
||||
|
@ -68,7 +68,7 @@ class AnyscaleEmbeddings(OpenAIEmbeddings):
|
||||
else:
|
||||
values["openai_api_base"] = values["anyscale_api_base"]
|
||||
values["openai_api_key"] = values["anyscale_api_key"].get_secret_value()
|
||||
values["client"] = openai.Embedding # type: ignore[attr-defined]
|
||||
values["client"] = openai.Embedding
|
||||
return values
|
||||
|
||||
@property
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user