mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-19 03:44:40 +00:00
Compare commits
4 Commits
langchain=
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
80fe6e5d10 | ||
|
|
cfa5f565ad | ||
|
|
2113f725ec | ||
|
|
d7c770b51b |
2
.github/copilot-instructions.md
vendored
2
.github/copilot-instructions.md
vendored
@@ -25,7 +25,7 @@ def get_user(user_id: str, verbose: bool = False): # Maintains stable interface
|
||||
* Prefer descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
* Break up overly long or deeply nested functions for **readability and maintainability**.
|
||||
* Avoid unnecessary abstraction or premature optimization.
|
||||
* All generated Python code must include type hints and return types.
|
||||
* All generated Python code must include type hints.
|
||||
|
||||
Bad:
|
||||
|
||||
|
||||
1
.github/scripts/check_diff.py
vendored
1
.github/scripts/check_diff.py
vendored
@@ -16,7 +16,6 @@ LANGCHAIN_DIRS = [
|
||||
"libs/core",
|
||||
"libs/text-splitters",
|
||||
"libs/langchain",
|
||||
"libs/langchain_v1",
|
||||
]
|
||||
|
||||
# when set to True, we are ignoring core dependents
|
||||
|
||||
2
.github/workflows/_release.yml
vendored
2
.github/workflows/_release.yml
vendored
@@ -340,7 +340,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
partner: [openai, anthropic]
|
||||
partner: [openai]
|
||||
fail-fast: false # Continue testing other partners if one fails
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
@@ -66,7 +66,7 @@ All out of scope targets defined by huntr as well as:
|
||||
for more details, but generally tools interact with the real world. Developers are
|
||||
expected to understand the security implications of their code and are responsible
|
||||
for the security of their tools.
|
||||
- Code documented with security notices. This will be decided on a case by
|
||||
- Code documented with security notices. This will be decided done on a case by
|
||||
case basis, but likely will not be eligible for a bounty as the code is already
|
||||
documented with guidelines for developers that should be followed for making their
|
||||
application secure.
|
||||
|
||||
@@ -20,7 +20,8 @@ LangChain is a framework that consists of a number of packages.
|
||||
|
||||
This package contains base abstractions for different components and ways to compose them together.
|
||||
The interfaces for core components like chat models, vector stores, tools and more are defined here.
|
||||
**No third-party integrations are defined here.** The dependencies are kept purposefully very lightweight.
|
||||
No third-party integrations are defined here.
|
||||
The dependencies are very lightweight.
|
||||
|
||||
## langchain
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain langchain-openai langchain-anthropic langchain-google-genai"
|
||||
"%pip install -qU langchain>=0.2.8 langchain-openai langchain-anthropic langchain-google-vertexai"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -38,7 +38,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 2,
|
||||
"id": "79e14913-803c-4382-9009-5c6af3d75d35",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -49,15 +49,38 @@
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_95293/571506279.py:4: LangChainBetaWarning: The function `init_chat_model` is in beta. It is actively being worked on, so the API may change.\n",
|
||||
" gpt_4o = init_chat_model(\"gpt-4o\", model_provider=\"openai\", temperature=0)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"GPT-4o: I'm an AI created by OpenAI, and I don't have a personal name. How can I assist you today?\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"GPT-4o: I’m called ChatGPT. How can I assist you today?\n",
|
||||
"\n",
|
||||
"Claude Opus: My name is Claude. It's nice to meet you!\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Gemini 1.5: I am a large language model, trained by Google. \n",
|
||||
"\n",
|
||||
"I don't have a name like a person does. You can call me Bard if you like! 😊 \n",
|
||||
"\n",
|
||||
"Gemini 2.5: I do not have a name. I am a large language model, trained by Google.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
@@ -65,10 +88,6 @@
|
||||
"source": [
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"# Don't forget to set your environment variables for the API keys of the respective providers!\n",
|
||||
"# For example, you can set them in your terminal or in a .env file:\n",
|
||||
"# export OPENAI_API_KEY=\"your_openai_api_key\"\n",
|
||||
"\n",
|
||||
"# Returns a langchain_openai.ChatOpenAI instance.\n",
|
||||
"gpt_4o = init_chat_model(\"gpt-4o\", model_provider=\"openai\", temperature=0)\n",
|
||||
"# Returns a langchain_anthropic.ChatAnthropic instance.\n",
|
||||
@@ -77,13 +96,13 @@
|
||||
")\n",
|
||||
"# Returns a langchain_google_vertexai.ChatVertexAI instance.\n",
|
||||
"gemini_15 = init_chat_model(\n",
|
||||
" \"gemini-2.5-pro\", model_provider=\"google_genai\", temperature=0\n",
|
||||
" \"gemini-1.5-pro\", model_provider=\"google_vertexai\", temperature=0\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Since all model integrations implement the ChatModel interface, you can use them in the same way.\n",
|
||||
"print(\"GPT-4o: \" + gpt_4o.invoke(\"what's your name\").content + \"\\n\")\n",
|
||||
"print(\"Claude Opus: \" + claude_opus.invoke(\"what's your name\").content + \"\\n\")\n",
|
||||
"print(\"Gemini 2.5: \" + gemini_15.invoke(\"what's your name\").content + \"\\n\")"
|
||||
"print(\"Gemini 1.5: \" + gemini_15.invoke(\"what's your name\").content + \"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -98,7 +117,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "0378ccc6-95bc-4d50-be50-fccc193f0a71",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -112,7 +131,7 @@
|
||||
"source": [
|
||||
"gpt_4o = init_chat_model(\"gpt-4o\", temperature=0)\n",
|
||||
"claude_opus = init_chat_model(\"claude-3-opus-20240229\", temperature=0)\n",
|
||||
"gemini_15 = init_chat_model(\"gemini-2.5-pro\", temperature=0)"
|
||||
"gemini_15 = init_chat_model(\"gemini-1.5-pro\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -127,7 +146,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 4,
|
||||
"id": "6c037f27-12d7-4e83-811e-4245c0e3ba58",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -141,10 +160,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='I’m called ChatGPT. How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 11, 'total_tokens': 24, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-BwCyyBpMqn96KED6zPhLm4k9SQMiQ', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--fada10c3-4128-406c-b83d-a850d16b365f-0', usage_metadata={'input_tokens': 11, 'output_tokens': 13, 'total_tokens': 24, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})"
|
||||
"AIMessage(content=\"I'm an AI created by OpenAI, and I don't have a personal name. How can I assist you today?\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 11, 'total_tokens': 34}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_25624ae3a5', 'finish_reason': 'stop', 'logprobs': None}, id='run-b41df187-4627-490d-af3c-1c96282d3eb0-0', usage_metadata={'input_tokens': 11, 'output_tokens': 23, 'total_tokens': 34})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -159,7 +178,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"id": "321e3036-abd2-4e1f-bcc6-606efd036954",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -173,10 +192,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"My name is Claude. It's nice to meet you!\", additional_kwargs={}, response_metadata={'id': 'msg_01VDGrG9D6yefanbBG9zPJrc', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 11, 'output_tokens': 15, 'server_tool_use': None, 'service_tier': 'standard'}, 'model_name': 'claude-3-5-sonnet-20240620'}, id='run--f0156087-debf-4b4b-9aaa-f3328a81ef92-0', usage_metadata={'input_tokens': 11, 'output_tokens': 15, 'total_tokens': 26, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}})"
|
||||
"AIMessage(content=\"My name is Claude. It's nice to meet you!\", additional_kwargs={}, response_metadata={'id': 'msg_01Fx9P74A7syoFkwE73CdMMY', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 11, 'output_tokens': 15}}, id='run-a0fd2bbd-3b7e-46bf-8d69-a48c7e60b03c-0', usage_metadata={'input_tokens': 11, 'output_tokens': 15, 'total_tokens': 26})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -375,9 +394,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"display_name": "poetry-venv-2",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "poetry-venv-2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -389,7 +408,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.16"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -34,8 +34,6 @@ These are the core building blocks you can use when building applications.
|
||||
[Chat Models](/docs/concepts/chat_models) are newer forms of language models that take messages in and output a message.
|
||||
See [supported integrations](/docs/integrations/chat/) for details on getting started with chat models from a specific provider.
|
||||
|
||||
- [How to: init any model in one line](/docs/how_to/chat_models_universal_init/)
|
||||
- [How to: work with local models](/docs/how_to/local_llms)
|
||||
- [How to: do function/tool calling](/docs/how_to/tool_calling)
|
||||
- [How to: get models to return structured output](/docs/how_to/structured_output)
|
||||
- [How to: cache model responses](/docs/how_to/chat_model_caching)
|
||||
@@ -50,6 +48,8 @@ See [supported integrations](/docs/integrations/chat/) for details on getting st
|
||||
- [How to: few shot prompt tool behavior](/docs/how_to/tools_few_shot)
|
||||
- [How to: bind model-specific formatted tools](/docs/how_to/tools_model_specific)
|
||||
- [How to: force a specific tool call](/docs/how_to/tool_choice)
|
||||
- [How to: work with local models](/docs/how_to/local_llms)
|
||||
- [How to: init any model in one line](/docs/how_to/chat_models_universal_init/)
|
||||
- [How to: pass multimodal data directly to models](/docs/how_to/multimodal_inputs/)
|
||||
|
||||
### Messages
|
||||
|
||||
@@ -13,15 +13,15 @@
|
||||
"\n",
|
||||
"This has at least two important benefits:\n",
|
||||
"\n",
|
||||
"1. **Privacy**: Your data is not sent to a third party, and it is not subject to the terms of service of a commercial service\n",
|
||||
"2. **Cost**: There is no inference fee, which is important for token-intensive applications (e.g., [long-running simulations](https://twitter.com/RLanceMartin/status/1691097659262820352?s=20), summarization)\n",
|
||||
"1. `Privacy`: Your data is not sent to a third party, and it is not subject to the terms of service of a commercial service\n",
|
||||
"2. `Cost`: There is no inference fee, which is important for token-intensive applications (e.g., [long-running simulations](https://twitter.com/RLanceMartin/status/1691097659262820352?s=20), summarization)\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"Running an LLM locally requires a few things:\n",
|
||||
"\n",
|
||||
"1. **Open-source LLM**: An open-source LLM that can be freely modified and shared \n",
|
||||
"2. **Inference**: Ability to run this LLM on your device w/ acceptable latency\n",
|
||||
"1. `Open-source LLM`: An open-source LLM that can be freely modified and shared \n",
|
||||
"2. `Inference`: Ability to run this LLM on your device w/ acceptable latency\n",
|
||||
"\n",
|
||||
"### Open-source LLMs\n",
|
||||
"\n",
|
||||
@@ -29,8 +29,8 @@
|
||||
"\n",
|
||||
"These LLMs can be assessed across at least two dimensions (see figure):\n",
|
||||
" \n",
|
||||
"1. **Base model**: What is the base-model and how was it trained?\n",
|
||||
"2. **Fine-tuning approach**: Was the base-model fine-tuned and, if so, what [set of instructions](https://cameronrwolfe.substack.com/p/beyond-llama-the-power-of-open-llms#%C2%A7alpaca-an-instruction-following-llama-model) was used?\n",
|
||||
"1. `Base model`: What is the base-model and how was it trained?\n",
|
||||
"2. `Fine-tuning approach`: Was the base-model fine-tuned and, if so, what [set of instructions](https://cameronrwolfe.substack.com/p/beyond-llama-the-power-of-open-llms#%C2%A7alpaca-an-instruction-following-llama-model) was used?\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -51,8 +51,8 @@
|
||||
"\n",
|
||||
"In general, these frameworks will do a few things:\n",
|
||||
"\n",
|
||||
"1. **Quantization**: Reduce the memory footprint of the raw model weights\n",
|
||||
"2. **Efficient implementation for inference**: Support inference on consumer hardware (e.g., CPU or laptop GPU)\n",
|
||||
"1. `Quantization`: Reduce the memory footprint of the raw model weights\n",
|
||||
"2. `Efficient implementation for inference`: Support inference on consumer hardware (e.g., CPU or laptop GPU)\n",
|
||||
"\n",
|
||||
"In particular, see [this excellent post](https://finbarr.ca/how-is-llama-cpp-possible/) on the importance of quantization.\n",
|
||||
"\n",
|
||||
@@ -679,17 +679,11 @@
|
||||
"\n",
|
||||
"In general, use cases for local LLMs can be driven by at least two factors:\n",
|
||||
"\n",
|
||||
"* **Privacy**: private data (e.g., journals, etc) that a user does not want to share \n",
|
||||
"* **Cost**: text preprocessing (extraction/tagging), summarization, and agent simulations are token-use-intensive tasks\n",
|
||||
"* `Privacy`: private data (e.g., journals, etc) that a user does not want to share \n",
|
||||
"* `Cost`: text preprocessing (extraction/tagging), summarization, and agent simulations are token-use-intensive tasks\n",
|
||||
"\n",
|
||||
"In addition, [here](https://blog.langchain.dev/using-langsmith-to-support-fine-tuning-of-open-source-llms/) is an overview on fine-tuning, which can utilize open-source LLMs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14c2c170",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -51,31 +51,7 @@
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to the [AWS docs](https://docs.aws.amazon.com/bedrock/latest/userguide/setting-up.html) to sign up to AWS and setup your credentials.\n",
|
||||
"\n",
|
||||
"Alternatively, `ChatBedrockConverse` will read from the following environment variables by default:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0f65be92",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"...\"\n",
|
||||
"# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"...\"\n",
|
||||
"\n",
|
||||
"# Not required unless using temporary credentials.\n",
|
||||
"# os.environ[\"AWS_SESSION_TOKEN\"] = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3baad5a9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You'll also need to turn on model access for your account, which you can do by following [these instructions](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html)."
|
||||
"Head to the [AWS docs](https://docs.aws.amazon.com/bedrock/latest/userguide/setting-up.html) to sign up to AWS and setup your credentials. You'll also need to turn on model access for your account, which you can do by following [these instructions](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -138,10 +114,6 @@
|
||||
"\n",
|
||||
"llm = ChatBedrockConverse(\n",
|
||||
" model_id=\"anthropic.claude-3-5-sonnet-20240620-v1:0\",\n",
|
||||
" # region_name=...,\n",
|
||||
" # aws_access_key_id=...,\n",
|
||||
" # aws_secret_access_key=...,\n",
|
||||
" # aws_session_token=...,\n",
|
||||
" # temperature=...,\n",
|
||||
" # max_tokens=...,\n",
|
||||
" # other params...\n",
|
||||
@@ -265,157 +237,6 @@
|
||||
" print(chunk.text(), end=\"|\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a009400a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extended Thinking \n",
|
||||
"\n",
|
||||
"This guide focuses on implementing Extended Thinking using AWS Bedrock with LangChain's `ChatBedrockConverse` integration.\n",
|
||||
"\n",
|
||||
"### Supported Models\n",
|
||||
"\n",
|
||||
"Extended Thinking is available for the following Claude models on AWS Bedrock:\n",
|
||||
"\n",
|
||||
"| Model | Model ID |\n",
|
||||
"|-------|----------|\n",
|
||||
"| **Claude Opus 4** | `anthropic.claude-opus-4-20250514-v1:0` |\n",
|
||||
"| **Claude Sonnet 4** | `anthropic.claude-sonnet-4-20250514-v1:0` |\n",
|
||||
"| **Claude 3.7 Sonnet** | `us.anthropic.claude-3-7-sonnet-20250219-v1:0` |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abc790ca",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user wants me to translate \"I love programming\" from English to French.\\n\\n\"I love\" translates to \"J\\'aime\" or \"J\\'adore\" in French\\n\"Programming\" translates to \"la programmation\" in French\\n\\nSo the translation would be \"J\\'aime la programmation\" or \"J\\'adore la programmation\"\\n\\nBoth are correct, but \"J\\'aime\" is more commonly used for expressing love/liking something.', 'signature': 'EpgECkgIBRABGAIqQDub6nRpiusjbxZONXVlGXg5ZjUY1Eka1Yp4oBBHmRqGjId+StTBPuwD3CXLyb2rUDRhSc3hTpTM4krVqlFZrIsSDI/WLa1mu38DDqt1HRoMUjm+jF+03MZFD+WQIjBZtHaYiqgY0JQgU0NdXDwwBSZX44gXwuX9EDekh12VM1ysq+WxVtkp0WMU0dKCJo4q/QKpguFFlZtEZjF9PftzOgTIyy+1H5pY+Dsb2pnrGtfAgwTR7PuZ/d8ibY0A8ywjVEZtGm+PtcnCJiK53BWxhGYOtxnfN/RRKtuZhvPQj+QQOWeRWqH+GcbeISCgyTYn5WG75fmVL707byjQZ3IuhMfyZWmiTFE2fc4Jn/bxX7OsU+DbTWv2K1a+g7eW+dvQwYzCBO1hfEn4699/CHII8UAcHh1L3bnxOWGKkeVQ0KMfgfwVb0vuGG4QBYKIDs87QL414i69D68DxqCTZAHK4lMA6Xs7zW+m0MMCct4iHRnJI8kat1mlBEpMz6NRo9KacZJXpLJxofIU4ho7R5/QHccdni0IidNkUtrLBSB3toNJoQEcStts2UR67NHTxn47zk1/hi4v4Ahtw9OEQFONaH6XaG1wjpqEdjQ8/Tmg9eB6ZLoQ4sQfhcMF8Uo3hHbBY8jA3jZ+9pa9VbuVbO6Eup8NX3XXZm2nk50OMWX7hBwgBmlZbEew6pWFu7+13EkYAQ=='}}, {'type': 'text', 'text': \"J'aime la programmation.\"}], additional_kwargs={}, response_metadata={'ResponseMetadata': {'RequestId': '169ca92f-19c9-480c-9fc3-4e5284507e67', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Tue, 22 Jul 2025 04:40:22 GMT', 'content-type': 'application/json', 'content-length': '1498', 'connection': 'keep-alive', 'x-amzn-requestid': '169ca92f-19c9-480c-9fc3-4e5284507e67'}, 'RetryAttempts': 0}, 'stopReason': 'end_turn', 'metrics': {'latencyMs': [2839]}, 'model_name': 'us.anthropic.claude-sonnet-4-20250514-v1:0'}, id='run--42e05e5d-ba86-4dce-9e29-2a4ba32c5804-0', usage_metadata={'input_tokens': 58, 'output_tokens': 122, 'total_tokens': 180, 'input_token_details': {'cache_creation': 0, 'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_aws import ChatBedrockConverse\n",
|
||||
"\n",
|
||||
"llm = ChatBedrockConverse(\n",
|
||||
" model_id=\"us.anthropic.claude-sonnet-4-20250514-v1:0\",\n",
|
||||
" region_name=\"us-west-2\",\n",
|
||||
" max_tokens=4096,\n",
|
||||
" additional_model_request_fields={\n",
|
||||
" \"thinking\": {\"type\": \"enabled\", \"budget_tokens\": 1024},\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7fb27b941602401d91542211134fc71a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user wants me to translate \"I love programming\" from English to French.\\n\\n\"I love\" translates to \"J\\'aime\" or \"J\\'adore\" in French\\n\"Programming\" translates to \"la programmation\" in French\\n\\nSo the translation would be \"J\\'aime la programmation\" or \"J\\'adore la programmation\"\\n\\nBoth are correct, but \"J\\'aime\" is more commonly used for expressing love/liking something.', 'signature': 'EpgECkgIBRABGAIqQDub6nRpiusjbxZONXVlGXg5ZjUY1Eka1Yp4oBBHmRqGjId+StTBPuwD3CXLyb2rUDRhSc3hTpTM4krVqlFZrIsSDI/WLa1mu38DDqt1HRoMUjm+jF+03MZFD+WQIjBZtHaYiqgY0JQgU0NdXDwwBSZX44gXwuX9EDekh12VM1ysq+WxVtkp0WMU0dKCJo4q/QKpguFFlZtEZjF9PftzOgTIyy+1H5pY+Dsb2pnrGtfAgwTR7PuZ/d8ibY0A8ywjVEZtGm+PtcnCJiK53BWxhGYOtxnfN/RRKtuZhvPQj+QQOWeRWqH+GcbeISCgyTYn5WG75fmVL707byjQZ3IuhMfyZWmiTFE2fc4Jn/bxX7OsU+DbTWv2K1a+g7eW+dvQwYzCBO1hfEn4699/CHII8UAcHh1L3bnxOWGKkeVQ0KMfgfwVb0vuGG4QBYKIDs87QL414i69D68DxqCTZAHK4lMA6Xs7zW+m0MMCct4iHRnJI8kat1mlBEpMz6NRo9KacZJXpLJxofIU4ho7R5/QHccdni0IidNkUtrLBSB3toNJoQEcStts2UR67NHTxn47zk1/hi4v4Ahtw9OEQFONaH6XaG1wjpqEdjQ8/Tmg9eB6ZLoQ4sQfhcMF8Uo3hHbBY8jA3jZ+9pa9VbuVbO6Eup8NX3XXZm2nk50OMWX7hBwgBmlZbEew6pWFu7+13EkYAQ=='}}, {'type': 'text', 'text': \"J'aime la programmation.\"}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f1eb1ce1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### How extended thinking works\n",
|
||||
"\n",
|
||||
"When extended thinking is turned on, Claude creates thinking content blocks where it outputs its internal reasoning. Claude incorporates insights from this reasoning before crafting a final response. The API response will include thinking content blocks, followed by text content blocks."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "951d8206",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('system',\n",
|
||||
" 'You are a helpful assistant that translates English to French. Translate the user sentence.'),\n",
|
||||
" ('human', 'I love programming.'),\n",
|
||||
" ('ai',\n",
|
||||
" [{'type': 'reasoning_content',\n",
|
||||
" 'reasoning_content': {'text': 'The user wants me to translate \"I love programming\" from English to French.\\n\\n\"I love\" translates to \"J\\'aime\" or \"J\\'adore\" in French\\n\"Programming\" translates to \"la programmation\" in French\\n\\nSo the translation would be \"J\\'aime la programmation\" or \"J\\'adore la programmation\"\\n\\nBoth are correct, but \"J\\'aime\" is more commonly used for expressing love/liking something.',\n",
|
||||
" 'signature': 'EpgECkgIBRABGAIqQDub6nRpiusjbxZONXVlGXg5ZjUY1Eka1Yp4oBBHmRqGjId+StTBPuwD3CXLyb2rUDRhSc3hTpTM4krVqlFZrIsSDI/WLa1mu38DDqt1HRoMUjm+jF+03MZFD+WQIjBZtHaYiqgY0JQgU0NdXDwwBSZX44gXwuX9EDekh12VM1ysq+WxVtkp0WMU0dKCJo4q/QKpguFFlZtEZjF9PftzOgTIyy+1H5pY+Dsb2pnrGtfAgwTR7PuZ/d8ibY0A8ywjVEZtGm+PtcnCJiK53BWxhGYOtxnfN/RRKtuZhvPQj+QQOWeRWqH+GcbeISCgyTYn5WG75fmVL707byjQZ3IuhMfyZWmiTFE2fc4Jn/bxX7OsU+DbTWv2K1a+g7eW+dvQwYzCBO1hfEn4699/CHII8UAcHh1L3bnxOWGKkeVQ0KMfgfwVb0vuGG4QBYKIDs87QL414i69D68DxqCTZAHK4lMA6Xs7zW+m0MMCct4iHRnJI8kat1mlBEpMz6NRo9KacZJXpLJxofIU4ho7R5/QHccdni0IidNkUtrLBSB3toNJoQEcStts2UR67NHTxn47zk1/hi4v4Ahtw9OEQFONaH6XaG1wjpqEdjQ8/Tmg9eB6ZLoQ4sQfhcMF8Uo3hHbBY8jA3jZ+9pa9VbuVbO6Eup8NX3XXZm2nk50OMWX7hBwgBmlZbEew6pWFu7+13EkYAQ=='}},\n",
|
||||
" {'type': 'text', 'text': \"J'aime la programmation.\"}]),\n",
|
||||
" ('human', 'I love AI')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"next_messages = messages + [(\"ai\", ai_msg.content), (\"human\", \"I love AI\")]\n",
|
||||
"next_messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "9d8c506c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user wants me to translate \"I love AI\" from English to French. \\n\\n\"I love\" translates to \"J\\'aime\" in French.\\n\"AI\" stands for \"Artificial Intelligence\" which in French is \"Intelligence Artificielle\" or abbreviated as \"IA\".\\n\\nSo the translation would be \"J\\'aime l\\'IA\" (using the abbreviation) or \"J\\'aime l\\'intelligence artificielle\" (using the full term).\\n\\nI think using the abbreviation \"IA\" would be more natural and commonly used, similar to how we use \"AI\" in English.', 'signature': 'EoMFCkgIBRABGAIqQOwp9d0YWm8NctfL9lf1MeWR1OxeAKB3Es19Lei2bdHQ4W0ezTK4wVcm/VLM+7kICX2aB9RAmUD5sJxoKHfdX38SDIR/aSJhHZifGOHqwBoMhzNsyPmB7FFNvNESIjBMVRpRUDTFGn5+nL0x5CjWhKA8H/XFnKYRrUyMYb1n7lCQA7BeEjsaWwxZ3YV9rZsq6APuaXaA40Bt+KnpPOo06r72L/DceliRAw1a6cuT5E0Dv0eIAOYblbXaKYn0jy8UzTUuctOP3As/zT5pK5yC+Rx0d2l9kuP3+COERM98u0R04bWn6qh0HcyE+zNc7c4YWkncjdmOxF/j6OxhcMhZEoX2035v9eUJ9+O/u1xaff08YAEfg7TGWrSIwalpjs1mzWA9ijKg8YyjmXjWnMeFn0z6LDqLaaKc+nC8IN9SLwA/eHpf/ayoEgmogn7gWzijW8MDbnlwpQDS75wK7An3RMEcpWD/OXrKb1EhWKEmOBro5BOTGsfK3ZDveRL0aCBINdOu+AHMQDFXJ04cRDEjs9GE3YC218UcFtS42TFO7/Ct5CYCTknETPx93zcGTOM2VPOZ02Uem1A7Nda/Fa4l2b03EUEtwlgske5K1RbeohN9sclxYsxX5nGJ5sSZurVCk9plkyTG3aiPvbohfVVarVgukKoKwoMDYz5rHVscWlUe+qeqJE/H+KKlhtzO+lWWDN4knqeYsZ55flO5Hq4vT20QCYnF8hcUx07ngGKXuGID9n5kFnLsP8sBUHYKm7bmopFFZvfPcmsqiV9yvG/8Ly9DHbmY5ZwxyrbdJCFT6HD6kq/mEBDftZ6dhmyKMimJBfbTj7d3VAILbRgB'}}, {'type': 'text', 'text': \"J'aime l'IA.\"}], additional_kwargs={}, response_metadata={'ResponseMetadata': {'RequestId': '023799d6-7ed5-4e49-8ad7-7460a49a9a45', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Tue, 22 Jul 2025 04:40:34 GMT', 'content-type': 'application/json', 'content-length': '1737', 'connection': 'keep-alive', 'x-amzn-requestid': '023799d6-7ed5-4e49-8ad7-7460a49a9a45'}, 'RetryAttempts': 0}, 'stopReason': 'end_turn', 'metrics': {'latencyMs': [3473]}, 'model_name': 'us.anthropic.claude-sonnet-4-20250514-v1:0'}, id='run--ca8abc92-60a9-4bd1-93b4-7788496eda7a-0', usage_metadata={'input_tokens': 75, 'output_tokens': 153, 'total_tokens': 228, 'input_token_details': {'cache_creation': 0, 'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg = llm.invoke(next_messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e53e3ebb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'type': 'reasoning_content', 'reasoning_content': {'text': 'The user wants me to translate \"I love AI\" from English to French. \\n\\n\"I love\" translates to \"J\\'aime\" in French.\\n\"AI\" stands for \"Artificial Intelligence\" which in French is \"Intelligence Artificielle\" or abbreviated as \"IA\".\\n\\nSo the translation would be \"J\\'aime l\\'IA\" (using the abbreviation) or \"J\\'aime l\\'intelligence artificielle\" (using the full term).\\n\\nI think using the abbreviation \"IA\" would be more natural and commonly used, similar to how we use \"AI\" in English.', 'signature': 'EoMFCkgIBRABGAIqQOwp9d0YWm8NctfL9lf1MeWR1OxeAKB3Es19Lei2bdHQ4W0ezTK4wVcm/VLM+7kICX2aB9RAmUD5sJxoKHfdX38SDIR/aSJhHZifGOHqwBoMhzNsyPmB7FFNvNESIjBMVRpRUDTFGn5+nL0x5CjWhKA8H/XFnKYRrUyMYb1n7lCQA7BeEjsaWwxZ3YV9rZsq6APuaXaA40Bt+KnpPOo06r72L/DceliRAw1a6cuT5E0Dv0eIAOYblbXaKYn0jy8UzTUuctOP3As/zT5pK5yC+Rx0d2l9kuP3+COERM98u0R04bWn6qh0HcyE+zNc7c4YWkncjdmOxF/j6OxhcMhZEoX2035v9eUJ9+O/u1xaff08YAEfg7TGWrSIwalpjs1mzWA9ijKg8YyjmXjWnMeFn0z6LDqLaaKc+nC8IN9SLwA/eHpf/ayoEgmogn7gWzijW8MDbnlwpQDS75wK7An3RMEcpWD/OXrKb1EhWKEmOBro5BOTGsfK3ZDveRL0aCBINdOu+AHMQDFXJ04cRDEjs9GE3YC218UcFtS42TFO7/Ct5CYCTknETPx93zcGTOM2VPOZ02Uem1A7Nda/Fa4l2b03EUEtwlgske5K1RbeohN9sclxYsxX5nGJ5sSZurVCk9plkyTG3aiPvbohfVVarVgukKoKwoMDYz5rHVscWlUe+qeqJE/H+KKlhtzO+lWWDN4knqeYsZ55flO5Hq4vT20QCYnF8hcUx07ngGKXuGID9n5kFnLsP8sBUHYKm7bmopFFZvfPcmsqiV9yvG/8Ly9DHbmY5ZwxyrbdJCFT6HD6kq/mEBDftZ6dhmyKMimJBfbTj7d3VAILbRgB'}}, {'type': 'text', 'text': \"J'aime l'IA.\"}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a77519e5-897d-41a0-a9bb-55300fa79efc",
|
||||
@@ -558,7 +379,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -572,7 +393,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.9"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,267 +1,265 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Cohere\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CohereEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Cohere embedding models using LangChain. For detailed documentation on `CohereEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/cohere/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Cohere\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Cohere embedding models you'll need to create a/an Cohere account, get an API key, and install the `langchain-cohere` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Head to [cohere.com](https://cohere.com) to sign up to Cohere and generate an API key. Once you’ve done this set the COHERE_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"COHERE_API_KEY\"):\n",
|
||||
" os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Enter your Cohere API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Cohere integration lives in the `langchain-cohere` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-cohere"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_cohere import CohereEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = CohereEmbeddings(\n",
|
||||
" model=\"embed-english-v3.0\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Cohere\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, -0.030212402, -0.08886719, -0.08569336, 0.007030487, -0.0010671616, -0.033813477, 0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CohereEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Cohere embedding models using LangChain. For detailed documentation on `CohereEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/cohere/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Cohere\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Cohere embedding models you'll need to create a/an Cohere account, get an API key, and install the `langchain-cohere` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Head to [cohere.com](https://cohere.com) to sign up to Cohere and generate an API key. Once you’ve done this set the COHERE_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.028869629, -0.030410767, -0.099121094, -0.07116699, -0.012748718, -0.0059432983, -0.04360962, 0.\n",
|
||||
"[-0.047332764, -0.049957275, -0.07458496, -0.034332275, -0.057922363, -0.0112838745, -0.06994629, 0.\n"
|
||||
]
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"COHERE_API_KEY\"):\n",
|
||||
" os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Enter your Cohere API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Cohere integration lives in the `langchain-cohere` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-cohere"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_cohere import CohereEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = CohereEmbeddings(\n",
|
||||
" model=\"embed-english-v3.0\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, -0.030212402, -0.08886719, -0.08569336, 0.007030487, -0.0010671616, -0.033813477, 0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.028869629, -0.030410767, -0.099121094, -0.07116699, -0.012748718, -0.0059432983, -0.04360962, 0.\n",
|
||||
"[-0.047332764, -0.049957275, -0.07458496, -0.034332275, -0.057922363, -0.0112838745, -0.06994629, 0.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `CohereEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/cohere/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `CohereEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/cohere/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -125,7 +125,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -264,7 +264,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,267 +1,265 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Fireworks\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Fireworks embedding models using LangChain. For detailed documentation on `FireworksEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/fireworks/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Fireworks\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Fireworks embedding models you'll need to create a Fireworks account, get an API key, and install the `langchain-fireworks` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [fireworks.ai](https://fireworks.ai/) to sign up to Fireworks and generate an API key. Once you’ve done this set the FIREWORKS_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"FIREWORKS_API_KEY\"):\n",
|
||||
" os.environ[\"FIREWORKS_API_KEY\"] = getpass.getpass(\"Enter your Fireworks API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Fireworks integration lives in the `langchain-fireworks` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_fireworks import FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = FireworksEmbeddings(\n",
|
||||
" model=\"nomic-ai/nomic-embed-text-v1.5\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Fireworks\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.01666259765625, 0.011688232421875, -0.1181640625, -0.10205078125, 0.05438232421875, -0.0890502929\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Fireworks embedding models using LangChain. For detailed documentation on `FireworksEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/fireworks/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Fireworks\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Fireworks embedding models you'll need to create a Fireworks account, get an API key, and install the `langchain-fireworks` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [fireworks.ai](https://fireworks.ai/) to sign up to Fireworks and generate an API key. Once you’ve done this set the FIREWORKS_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.016632080078125, 0.01165008544921875, -0.1181640625, -0.10186767578125, 0.05438232421875, -0.0890\n",
|
||||
"[-0.02667236328125, 0.036651611328125, -0.1630859375, -0.0904541015625, -0.022430419921875, -0.09545\n"
|
||||
]
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"FIREWORKS_API_KEY\"):\n",
|
||||
" os.environ[\"FIREWORKS_API_KEY\"] = getpass.getpass(\"Enter your Fireworks API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Fireworks integration lives in the `langchain-fireworks` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_fireworks import FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = FireworksEmbeddings(\n",
|
||||
" model=\"nomic-ai/nomic-embed-text-v1.5\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.01666259765625, 0.011688232421875, -0.1181640625, -0.10205078125, 0.05438232421875, -0.0890502929\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.016632080078125, 0.01165008544921875, -0.1181640625, -0.10186767578125, 0.05438232421875, -0.0890\n",
|
||||
"[-0.02667236328125, 0.036651611328125, -0.1630859375, -0.0904541015625, -0.022430419921875, -0.09545\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3fba556a-b53d-431c-b0c6-ffb1e2fa5a6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `FireworksEmbeddings` features and configurations head to the [API reference](https://python.langchain.com/api_reference/fireworks/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3fba556a-b53d-431c-b0c6-ffb1e2fa5a6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `FireworksEmbeddings` features and configurations head to the [API reference](https://python.langchain.com/api_reference/fireworks/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -203,7 +203,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -327,7 +327,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "langchain_ibm",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -341,9 +341,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -286,7 +286,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,266 +1,264 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: MistralAI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MistralAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with MistralAI embedding models using LangChain. For detailed documentation on `MistralAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/mistralai/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"MistralAI\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access MistralAI embedding models you'll need to create a/an MistralAI account, get an API key, and install the `langchain-mistralai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://console.mistral.ai/](https://console.mistral.ai/) to sign up to MistralAI and generate an API key. Once you've done this set the MISTRALAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"MISTRALAI_API_KEY\"):\n",
|
||||
" os.environ[\"MISTRALAI_API_KEY\"] = getpass.getpass(\"Enter your MistralAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain MistralAI integration lives in the `langchain-mistralai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-mistralai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import MistralAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = MistralAIEmbeddings(\n",
|
||||
" model=\"mistral-embed\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: MistralAI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.04443359375, 0.01885986328125, 0.018035888671875, -0.00864410400390625, 0.049652099609375, -0.00\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MistralAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with MistralAI embedding models using LangChain. For detailed documentation on `MistralAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/mistralai/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"MistralAI\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access MistralAI embedding models you'll need to create a/an MistralAI account, get an API key, and install the `langchain-mistralai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://console.mistral.ai/](https://console.mistral.ai/) to sign up to MistralAI and generate an API key. Once you've done this set the MISTRALAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.04443359375, 0.01885986328125, 0.0180511474609375, -0.0086517333984375, 0.049652099609375, -0.00\n",
|
||||
"[-0.02032470703125, 0.02606201171875, 0.051605224609375, -0.0281982421875, 0.055755615234375, 0.0019\n"
|
||||
]
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"MISTRALAI_API_KEY\"):\n",
|
||||
" os.environ[\"MISTRALAI_API_KEY\"] = getpass.getpass(\"Enter your MistralAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain MistralAI integration lives in the `langchain-mistralai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-mistralai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import MistralAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = MistralAIEmbeddings(\n",
|
||||
" model=\"mistral-embed\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.04443359375, 0.01885986328125, 0.018035888671875, -0.00864410400390625, 0.049652099609375, -0.00\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.04443359375, 0.01885986328125, 0.0180511474609375, -0.0086517333984375, 0.049652099609375, -0.00\n",
|
||||
"[-0.02032470703125, 0.02606201171875, 0.051605224609375, -0.0281982421875, 0.055755615234375, 0.0019\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `MistralAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/mistralai/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `MistralAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/mistralai/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -128,7 +128,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -277,7 +277,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.10.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -112,7 +112,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -249,7 +249,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -37,7 +37,6 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -45,14 +44,15 @@
|
||||
"start_time": "2025-03-20T01:53:27.764291Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NETMIND_API_KEY\"):\n",
|
||||
" os.environ[\"NETMIND_API_KEY\"] = getpass.getpass(\"Enter your Netmind API key: \")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -64,7 +64,6 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -72,11 +71,12 @@
|
||||
"start_time": "2025-03-20T01:53:32.141858Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -90,7 +90,6 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -98,21 +97,22 @@
|
||||
"start_time": "2025-03-20T01:53:36.171640Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"%pip install -qU langchain-netmind"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\r\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\r\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n",
|
||||
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m24.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m25.0.1\u001B[0m\r\n",
|
||||
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-netmind"
|
||||
]
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -126,7 +126,6 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -134,14 +133,15 @@
|
||||
"start_time": "2025-03-20T01:54:30.146876Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_netmind import NetmindEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = NetmindEmbeddings(\n",
|
||||
" model=\"nvidia/NV-Embed-v2\",\n",
|
||||
")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -150,14 +150,13 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -165,18 +164,6 @@
|
||||
"start_time": "2025-03-20T01:54:34.500805Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
@@ -196,7 +183,20 @@
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 5
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -216,7 +216,6 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -224,6 +223,10 @@
|
||||
"start_time": "2025-03-20T01:54:45.196528Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -233,10 +236,7 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
"execution_count": 6
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -250,7 +250,6 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -258,6 +257,14 @@
|
||||
"start_time": "2025-03-20T01:54:52.468719Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -268,14 +275,7 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
"execution_count": 7
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -291,12 +291,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "adb9e45c34733299",
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"execution_count": null,
|
||||
"source": "",
|
||||
"id": "adb9e45c34733299"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -315,7 +315,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,287 +1,285 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Nomic\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NomicEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Nomic embedding models using LangChain. For detailed documentation on `NomicEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Nomic\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Nomic embedding models you'll need to create a/an Nomic account, get an API key, and install the `langchain-nomic` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://atlas.nomic.ai/](https://atlas.nomic.ai/) to sign up to Nomic and generate an API key. Once you've done this set the `NOMIC_API_KEY` environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NOMIC_API_KEY\"):\n",
|
||||
" os.environ[\"NOMIC_API_KEY\"] = getpass.getpass(\"Enter your Nomic API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Nomic integration lives in the `langchain-nomic` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-nomic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_nomic import NomicEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = NomicEmbeddings(\n",
|
||||
" model=\"nomic-embed-text-v1.5\",\n",
|
||||
" # dimensionality=256,\n",
|
||||
" # Nomic's `nomic-embed-text-v1.5` model was [trained with Matryoshka learning](https://blog.nomic.ai/posts/nomic-embed-matryoshka)\n",
|
||||
" # to enable variable-length embeddings with a single model.\n",
|
||||
" # This means that you can specify the dimensionality of the embeddings at inference time.\n",
|
||||
" # The model supports dimensionality from 64 to 768.\n",
|
||||
" # inference_mode=\"remote\",\n",
|
||||
" # One of `remote`, `local` (Embed4All), or `dynamic` (automatic). Defaults to `remote`.\n",
|
||||
" # api_key=... , # if using remote inference,\n",
|
||||
" # device=\"cpu\",\n",
|
||||
" # The device to use for local embeddings. Choices include\n",
|
||||
" # `cpu`, `gpu`, `nvidia`, `amd`, or a specific device name. See\n",
|
||||
" # the docstring for `GPT4All.__init__` for more info. Typically\n",
|
||||
" # defaults to CPU. Do not use on macOS.\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Nomic\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.024642944, 0.029083252, -0.14013672, -0.09082031, 0.058898926, -0.07489014, -0.0138168335, 0.0037\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NomicEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Nomic embedding models using LangChain. For detailed documentation on `NomicEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Nomic\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Nomic embedding models you'll need to create a/an Nomic account, get an API key, and install the `langchain-nomic` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://atlas.nomic.ai/](https://atlas.nomic.ai/) to sign up to Nomic and generate an API key. Once you've done this set the `NOMIC_API_KEY` environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.012771606, 0.023727417, -0.12365723, -0.083740234, 0.06530762, -0.07110596, -0.021896362, -0.0068\n",
|
||||
"[-0.019058228, 0.04058838, -0.15222168, -0.06842041, -0.012130737, -0.07128906, -0.04534912, 0.00522\n"
|
||||
]
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NOMIC_API_KEY\"):\n",
|
||||
" os.environ[\"NOMIC_API_KEY\"] = getpass.getpass(\"Enter your Nomic API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Nomic integration lives in the `langchain-nomic` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-nomic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_nomic import NomicEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = NomicEmbeddings(\n",
|
||||
" model=\"nomic-embed-text-v1.5\",\n",
|
||||
" # dimensionality=256,\n",
|
||||
" # Nomic's `nomic-embed-text-v1.5` model was [trained with Matryoshka learning](https://blog.nomic.ai/posts/nomic-embed-matryoshka)\n",
|
||||
" # to enable variable-length embeddings with a single model.\n",
|
||||
" # This means that you can specify the dimensionality of the embeddings at inference time.\n",
|
||||
" # The model supports dimensionality from 64 to 768.\n",
|
||||
" # inference_mode=\"remote\",\n",
|
||||
" # One of `remote`, `local` (Embed4All), or `dynamic` (automatic). Defaults to `remote`.\n",
|
||||
" # api_key=... , # if using remote inference,\n",
|
||||
" # device=\"cpu\",\n",
|
||||
" # The device to use for local embeddings. Choices include\n",
|
||||
" # `cpu`, `gpu`, `nvidia`, `amd`, or a specific device name. See\n",
|
||||
" # the docstring for `GPT4All.__init__` for more info. Typically\n",
|
||||
" # defaults to CPU. Do not use on macOS.\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.024642944, 0.029083252, -0.14013672, -0.09082031, 0.058898926, -0.07489014, -0.0138168335, 0.0037\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.012771606, 0.023727417, -0.12365723, -0.083740234, 0.06530762, -0.07110596, -0.021896362, -0.0068\n",
|
||||
"[-0.019058228, 0.04058838, -0.15222168, -0.06842041, -0.012130737, -0.07128906, -0.04534912, 0.00522\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `NomicEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `NomicEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/nomic/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -1,272 +1,270 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: OpenAI\n",
|
||||
"keywords: [openaiembeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with OpenAI embedding models using LangChain. For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/openai/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"OpenAI\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access OpenAI embedding models you'll need to create a/an OpenAI account, get an API key, and install the `langchain-openai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [platform.openai.com](https://platform.openai.com) to sign up to OpenAI and generate an API key. Once you’ve done this set the OPENAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"OPENAI_API_KEY\"):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain OpenAI integration lives in the `langchain-openai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(\n",
|
||||
" model=\"text-embedding-3-large\",\n",
|
||||
" # With the `text-embedding-3` class\n",
|
||||
" # of models, you can specify the size\n",
|
||||
" # of the embeddings you want returned.\n",
|
||||
" # dimensions=1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: OpenAI\n",
|
||||
"keywords: [openaiembeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.019276829436421394, 0.0037708976306021214, -0.03294256329536438, 0.0037671267054975033, 0.008175\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with OpenAI embedding models using LangChain. For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/openai/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"OpenAI\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access OpenAI embedding models you'll need to create a/an OpenAI account, get an API key, and install the `langchain-openai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [platform.openai.com](https://platform.openai.com) to sign up to OpenAI and generate an API key. Once you’ve done this set the OPENAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.019260549917817116, 0.0037612367887049913, -0.03291035071015358, 0.003757466096431017, 0.0082049\n",
|
||||
"[-0.010181212797760963, 0.023419594392180443, -0.04215526953339577, -0.001532090245746076, -0.023573\n"
|
||||
]
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"OPENAI_API_KEY\"):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain OpenAI integration lives in the `langchain-openai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(\n",
|
||||
" model=\"text-embedding-3-large\",\n",
|
||||
" # With the `text-embedding-3` class\n",
|
||||
" # of models, you can specify the size\n",
|
||||
" # of the embeddings you want returned.\n",
|
||||
" # dimensions=1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.019276829436421394, 0.0037708976306021214, -0.03294256329536438, 0.0037671267054975033, 0.008175\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.019260549917817116, 0.0037612367887049913, -0.03291035071015358, 0.003757466096431017, 0.0082049\n",
|
||||
"[-0.010181212797760963, 0.023419594392180443, -0.04215526953339577, -0.001532090245746076, -0.023573\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/openai/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `OpenAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/openai/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -244,7 +244,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -141,7 +141,7 @@
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
@@ -252,7 +252,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,277 +1,275 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Together AI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TogetherEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Together embedding models using LangChain. For detailed documentation on `TogetherEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/together/embeddings/langchain_together.embeddings.TogetherEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Together\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Together embedding models you'll need to create a/an Together account, get an API key, and install the `langchain-together` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://api.together.xyz/](https://api.together.xyz/) to sign up to Together and generate an API key. Once you've done this set the TOGETHER_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"TOGETHER_API_KEY\"):\n",
|
||||
" os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Together integration lives in the `langchain-together` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-together"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_together import TogetherEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = TogetherEmbeddings(\n",
|
||||
" model=\"togethercomputer/m2-bert-80M-8k-retrieval\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Together AI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.3812227, -0.052848946, -0.10564975, 0.03480297, 0.2878488, 0.0084609175, 0.11605915, 0.05303011, \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TogetherEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with Together embedding models using LangChain. For detailed documentation on `TogetherEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/together/embeddings/langchain_together.embeddings.TogetherEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Together\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Together embedding models you'll need to create a/an Together account, get an API key, and install the `langchain-together` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://api.together.xyz/](https://api.together.xyz/) to sign up to Together and generate an API key. Once you've done this set the TOGETHER_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.3812227, -0.052848946, -0.10564975, 0.03480297, 0.2878488, 0.0084609175, 0.11605915, 0.05303011, \n",
|
||||
"[0.066308185, -0.032866564, 0.115751594, 0.19082588, 0.14017, -0.26976448, -0.056340694, -0.26923394\n"
|
||||
]
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"TOGETHER_API_KEY\"):\n",
|
||||
" os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Together integration lives in the `langchain-together` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-together"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_together import TogetherEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = TogetherEmbeddings(\n",
|
||||
" model=\"togethercomputer/m2-bert-80M-8k-retrieval\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.3812227, -0.052848946, -0.10564975, 0.03480297, 0.2878488, 0.0084609175, 0.11605915, 0.05303011, \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.3812227, -0.052848946, -0.10564975, 0.03480297, 0.2878488, 0.0084609175, 0.11605915, 0.05303011, \n",
|
||||
"[0.066308185, -0.032866564, 0.115751594, 0.19082588, 0.14017, -0.26976448, -0.056340694, -0.26923394\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `TogetherEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/together/embeddings/langchain_together.embeddings.TogetherEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `TogetherEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/together/embeddings/langchain_together.embeddings.TogetherEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -1,279 +1,277 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: ZhipuAI\n",
|
||||
"keywords: [zhipuaiembeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ZhipuAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with ZhipuAI embedding models using LangChain. For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://bigmodel.cn/dev/api#vector).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Provider | Package |\n",
|
||||
"|:--------:|:-------:|\n",
|
||||
"| [ZhipuAI](/docs/integrations/providers/zhipuai/) | [langchain-community](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html) |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access ZhipuAI embedding models you'll need to create a/an ZhipuAI account, get an API key, and install the `zhipuai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://bigmodel.cn/](https://bigmodel.cn/usercenter/apikeys) to sign up to ZhipuAI and generate an API key. Once you've done this set the ZHIPUAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"ZHIPUAI_API_KEY\"):\n",
|
||||
" os.environ[\"ZHIPUAI_API_KEY\"] = getpass.getpass(\"Enter your ZhipuAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain ZhipuAI integration lives in the `zhipuai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU zhipuai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import ZhipuAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = ZhipuAIEmbeddings(\n",
|
||||
" model=\"embedding-3\",\n",
|
||||
" # With the `embedding-3` class\n",
|
||||
" # of models, you can specify the size\n",
|
||||
" # of the embeddings you want returned.\n",
|
||||
" # dimensions=1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: ZhipuAI\n",
|
||||
"keywords: [zhipuaiembeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, 0.007785797, 0.04598999, 0.012741089, -0.01689148, 0.008277893, 0.016464233, 0.009246\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ZhipuAIEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with ZhipuAI embedding models using LangChain. For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://bigmodel.cn/dev/api#vector).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Provider | Package |\n",
|
||||
"|:--------:|:-------:|\n",
|
||||
"| [ZhipuAI](/docs/integrations/providers/zhipuai/) | [langchain-community](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html) |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access ZhipuAI embedding models you'll need to create a/an ZhipuAI account, get an API key, and install the `zhipuai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [https://bigmodel.cn/](https://bigmodel.cn/usercenter/apikeys) to sign up to ZhipuAI and generate an API key. Once you've done this set the ZHIPUAI_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, 0.007785797, 0.04598999, 0.012741089, -0.01689148, 0.008277893, 0.016464233, 0.009246\n",
|
||||
"[-0.02330017, -0.013916016, 0.00022411346, 0.017196655, -0.034240723, 0.011131287, 0.011497498, -0.0\n"
|
||||
]
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"ZHIPUAI_API_KEY\"):\n",
|
||||
" os.environ[\"ZHIPUAI_API_KEY\"] = getpass.getpass(\"Enter your ZhipuAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain ZhipuAI integration lives in the `zhipuai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU zhipuai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import ZhipuAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = ZhipuAIEmbeddings(\n",
|
||||
" model=\"embedding-3\",\n",
|
||||
" # With the `embedding-3` class\n",
|
||||
" # of models, you can specify the size\n",
|
||||
" # of the embeddings you want returned.\n",
|
||||
" # dimensions=1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, 0.007785797, 0.04598999, 0.012741089, -0.01689148, 0.008277893, 0.016464233, 0.009246\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-0.022979736, 0.007785797, 0.04598999, 0.012741089, -0.01689148, 0.008277893, 0.016464233, 0.009246\n",
|
||||
"[-0.02330017, -0.013916016, 0.00022411346, 0.017196655, -0.034240723, 0.011131287, 0.011497498, -0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `ZhipuAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -11,13 +11,6 @@
|
||||
"\n",
|
||||
">[Chroma](https://docs.trychroma.com/getting-started) is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0. View the full docs of `Chroma` at [this page](https://docs.trychroma.com/reference/py-collection), and find the API reference for the LangChain integration at [this page](https://python.langchain.com/api_reference/chroma/vectorstores/langchain_chroma.vectorstores.Chroma.html).\n",
|
||||
"\n",
|
||||
":::info Chroma Cloud\n",
|
||||
"\n",
|
||||
"Chroma Cloud powers serverless vector and full-text search. It's extremely fast, cost-effective, scalable and painless. Create a DB and try it out in under 30 seconds with $5 of free credits.\n",
|
||||
"\n",
|
||||
"[Get started with Chroma Cloud](https://trychroma.com/signup)\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access `Chroma` vector stores you'll need to install the `langchain-chroma` integration package."
|
||||
@@ -40,15 +33,7 @@
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"You can use the `Chroma` vector store without any credentials, simply installing the package above is enough!\n",
|
||||
"\n",
|
||||
"If you are a [Chroma Cloud](https://trychroma.com/signup) user, set your `CHROMA_TENANT`, `CHROMA_DATABASE`, and `CHROMA_API_KEY` environment variables.\n",
|
||||
"\n",
|
||||
"When you install the `chromadb` package you also get access to the Chroma CLI, which can set these for you. First, [login](https://docs.trychroma.com/docs/cli/login) via the CLI, and then use the [`connect` command](https://docs.trychroma.com/docs/cli/db):\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"chroma db connect [db_name] --env-file\n",
|
||||
"```"
|
||||
"You can use the `Chroma` vector store without any credentials, simply installing the package above is enough!"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,7 +73,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "d3ed0a9a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -100,19 +85,9 @@
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6a43e25-227c-4e89-909f-3654fe2710fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Running Locally (In-Memory)\n",
|
||||
"\n",
|
||||
"You can get a Chroma server running in memory by simply instantiating a `Chroma` instance with a collection name and your embeddings provider:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 16,
|
||||
"id": "3ea11a7b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -122,104 +97,7 @@
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92d04cda-e8cc-48aa-9680-470304e3ff4c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you don't need data persistence, this is a great option for experimenting while building your AI application with Langchain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ad6adc53-4b3f-458e-8e2e-efcc3f99f0c5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Running Locally (with Data Persistence)\n",
|
||||
"\n",
|
||||
"You can provide the `persist_directory` argument to save your data across multiple runs of your program:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a858e77-fd6d-44f0-840f-8f71eaeae6f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"\n",
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" persist_directory=\"./chroma_langchain_db\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "47bf272e-af0b-450e-8a86-3e8292273cde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Connecting to a Chroma Server\n",
|
||||
"\n",
|
||||
"If you have a Chroma server running locally, or you have [deployed](https://docs.trychroma.com/guides/deploy/client-server-mode) one yourself, you can connect to it by providing the `host` argument.\n",
|
||||
"\n",
|
||||
"For example, you can start a Chroma server running locally with `chroma run`, and then connect it with `host='localhost'`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "679d619f-b8ee-4abb-8ac0-77ec859ddff1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"\n",
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" host=\"localhost\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3c06ed9-c010-4764-bd6e-2a0c71201d5b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For other deployments you can use the `port`, `ssl`, and `headers` arguments to customize your connection."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f3238e1-ca57-482d-878d-b09bd2c8015c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Chroma Cloud\n",
|
||||
"\n",
|
||||
"Chroma Cloud users can also build with Langchain. Provide your `Chroma` instance with your Chroma Cloud API key, tenant, and DB name:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e080d2d2-c501-467e-9842-e2045d86cdb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"\n",
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" chroma_cloud_api_key=os.getenv(\"CHROMA_API_KEY\"),\n",
|
||||
" tenant=os.getenv(\"CHROMA_TENANT\"),\n",
|
||||
" database=os.getenv(\"CHROMA_DATABASE\"),\n",
|
||||
" persist_directory=\"./chroma_langchain_db\", # Where to save data locally, remove if not necessary\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -233,132 +111,21 @@
|
||||
"You can also initialize from a `Chroma` client, which is particularly useful if you want easier access to the underlying database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38e9f893-60df-4a4f-b570-2d1c463cc1e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Running Locally (In-Memory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "09bfb62f-7c6b-43d3-a69a-0601899c6942",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"client = chromadb.Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f3eac2de-0cca-4d57-b67d-04cc78bb59c1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Running Locally (with Data Persistence)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ffc7f2ad-0d6c-4911-a4cf-a82bf7649478",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"client = chromadb.PersistentClient(path=\"./chroma_langchain_db\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "41cc98d5-94f3-4a2f-903e-61c4a38d8f9c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Connecting to a Chroma Server\n",
|
||||
"\n",
|
||||
"For example, if you are running a Chroma server locally (using `chroma run`):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bb5828e3-c0a5-4f97-8d2e-23d82257743e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"client = chromadb.HttpClient(host=\"localhost\", port=8000, ssl=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "254ecfdb-f247-4a3d-a52a-e515b17b7ba2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Chroma Cloud"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fbbf8042-7ae7-4221-96e3-dc2048dd0f45",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"After setting your `CHROMA_API_KEY`, `CHROMA_TENANT`, and `CHROMA_DATABASE`, you can simply instantiate:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89e86a01-a347-4041-a4a1-01eecd299235",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"client = chromadb.CloudClient()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8fdd8bbb-45ab-43d8-bdc1-7220b14cfc52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Access your Chroma DB"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6da21a1a-8d0d-4a4b-bac5-008839e89540",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"collection = client.get_or_create_collection(\"collection_name\")\n",
|
||||
"collection.add(ids=[\"1\", \"2\", \"3\"], documents=[\"a\", \"b\", \"c\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "581906ba-8082-450c-a3c4-19284539980b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Create a Chroma Vectorstore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "3fe4457f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"persistent_client = chromadb.PersistentClient()\n",
|
||||
"collection = persistent_client.get_or_create_collection(\"collection_name\")\n",
|
||||
"collection.add(ids=[\"1\", \"2\", \"3\"], documents=[\"a\", \"b\", \"c\"])\n",
|
||||
"\n",
|
||||
"vector_store_from_client = Chroma(\n",
|
||||
" client=client,\n",
|
||||
" client=persistent_client,\n",
|
||||
" collection_name=\"collection_name\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
")"
|
||||
@@ -380,10 +147,30 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 17,
|
||||
"id": "da279339",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['f22ed484-6db3-4b76-adb1-18a777426cd6',\n",
|
||||
" 'e0d5bab4-6453-4511-9a37-023d9d288faa',\n",
|
||||
" '877d76b8-3580-4d9e-a13f-eed0fa3d134a',\n",
|
||||
" '26eaccab-81ce-4c0a-8e76-bf542647df18',\n",
|
||||
" 'bcaa8239-7986-4050-bf40-e14fb7dab997',\n",
|
||||
" 'cdc44b38-a83f-4e49-b249-7765b334e09d',\n",
|
||||
" 'a7a35354-2687-4bc2-8242-3849a4d18d34',\n",
|
||||
" '8780caf1-d946-4f27-a707-67d037e9e1d8',\n",
|
||||
" 'dec6af2a-7326-408f-893d-7d7d717dfda9',\n",
|
||||
" '3b18e210-bb59-47a0-8e17-c8e51176ea5e']"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
@@ -478,7 +265,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "ef5dbd1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -514,7 +301,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"id": "56f17791",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -540,10 +327,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"id": "e2b96fcf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
@@ -566,10 +362,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"id": "2768a331",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=1.726390] The stock market is down 500 points today due to fears of a recession. [{'source': 'news'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
@@ -590,10 +394,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"id": "8ea434a5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* I had chocolate chip pancakes and fried eggs for breakfast this morning. [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_by_vector(\n",
|
||||
" embedding=embeddings.embed_query(\"I love green eggs and ham!\"), k=1\n",
|
||||
@@ -618,10 +430,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 12,
|
||||
"id": "7b6f7867",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"mmr\", search_kwargs={\"k\": 1, \"fetch_k\": 5}\n",
|
||||
@@ -670,7 +493,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.0"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1029,7 +1029,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "Chroma",
|
||||
@@ -1039,10 +1039,10 @@ const FEATURE_TABLES = {
|
||||
searchByVector: true,
|
||||
searchWithScore: true,
|
||||
async: true,
|
||||
passesStandardTests: true,
|
||||
multiTenancy: true,
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "Clickhouse",
|
||||
@@ -1055,7 +1055,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "CouchbaseSearchVectorStore",
|
||||
@@ -1081,7 +1081,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: false,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "ElasticsearchStore",
|
||||
@@ -1094,7 +1094,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "FAISS",
|
||||
@@ -1107,7 +1107,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "InMemoryVectorStore",
|
||||
@@ -1120,7 +1120,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "Milvus",
|
||||
@@ -1146,7 +1146,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "openGauss",
|
||||
@@ -1172,7 +1172,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "PineconeVectorStore",
|
||||
@@ -1185,7 +1185,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "QdrantVectorStore",
|
||||
@@ -1211,7 +1211,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "Weaviate",
|
||||
@@ -1224,7 +1224,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: true,
|
||||
local: true,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
{
|
||||
name: "SQLServer",
|
||||
@@ -1237,7 +1237,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: false,
|
||||
idsInAddDocuments: true,
|
||||
idsInAddDocuments: false,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ The interfaces for core components like chat models, LLMs, vector stores, retrie
|
||||
and more are defined here. The universal invocation protocol (Runnables) along with
|
||||
a syntax for combining components (LangChain Expression Language) are also defined here.
|
||||
|
||||
**No third-party integrations are defined here.** The dependencies are kept purposefully
|
||||
No third-party integrations are defined here. The dependencies are kept purposefully
|
||||
very lightweight.
|
||||
"""
|
||||
|
||||
|
||||
@@ -111,9 +111,8 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
|
||||
def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
|
||||
"""Format messages for tracing in on_chat_model_start.
|
||||
|
||||
- Update image content blocks to OpenAI Chat Completions format (backward
|
||||
compatibility).
|
||||
- Add "type" key to content blocks that have a single key.
|
||||
For backward compatibility, we update image content blocks to OpenAI Chat
|
||||
Completions format.
|
||||
|
||||
Args:
|
||||
messages: List of messages to format.
|
||||
@@ -126,36 +125,20 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
|
||||
message_to_trace = message
|
||||
if isinstance(message.content, list):
|
||||
for idx, block in enumerate(message.content):
|
||||
if isinstance(block, dict):
|
||||
# Update image content blocks to OpenAI # Chat Completions format.
|
||||
if (
|
||||
block.get("type") == "image"
|
||||
and is_data_content_block(block)
|
||||
and block.get("source_type") != "id"
|
||||
):
|
||||
if message_to_trace is message:
|
||||
# Shallow copy
|
||||
message_to_trace = message.model_copy()
|
||||
message_to_trace.content = list(message_to_trace.content)
|
||||
if (
|
||||
isinstance(block, dict)
|
||||
and block.get("type") == "image"
|
||||
and is_data_content_block(block)
|
||||
and block.get("source_type") != "id"
|
||||
):
|
||||
if message_to_trace is message:
|
||||
message_to_trace = message.model_copy()
|
||||
# Also shallow-copy content
|
||||
message_to_trace.content = list(message_to_trace.content)
|
||||
|
||||
message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
|
||||
convert_to_openai_image_block(block)
|
||||
)
|
||||
elif len(block) == 1 and "type" not in block:
|
||||
# Tracing assumes all content blocks have a "type" key. Here
|
||||
# we add this key if it is missing, and there's an obvious
|
||||
# choice for the type (e.g., a single key in the block).
|
||||
if message_to_trace is message:
|
||||
# Shallow copy
|
||||
message_to_trace = message.model_copy()
|
||||
message_to_trace.content = list(message_to_trace.content)
|
||||
key = next(iter(block))
|
||||
message_to_trace.content[idx] = { # type: ignore[index]
|
||||
"type": key,
|
||||
key: block[key],
|
||||
}
|
||||
else:
|
||||
pass
|
||||
message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
|
||||
convert_to_openai_image_block(block)
|
||||
)
|
||||
messages_to_trace.append(message_to_trace)
|
||||
|
||||
return messages_to_trace
|
||||
|
||||
@@ -230,7 +230,6 @@ def ensure_config(config: Optional[RunnableConfig] = None) -> RunnableConfig:
|
||||
not key.startswith("__")
|
||||
and isinstance(value, (str, int, float, bool))
|
||||
and key not in empty["metadata"]
|
||||
and key != "api_key"
|
||||
):
|
||||
empty["metadata"][key] = value
|
||||
return empty
|
||||
|
||||
@@ -23,12 +23,7 @@ if TYPE_CHECKING:
|
||||
from langchain_core.utils.iter import batch_iterate
|
||||
from langchain_core.utils.loading import try_load_from_hub
|
||||
from langchain_core.utils.pydantic import pre_init
|
||||
from langchain_core.utils.strings import (
|
||||
comma_list,
|
||||
sanitize_for_postgres,
|
||||
stringify_dict,
|
||||
stringify_value,
|
||||
)
|
||||
from langchain_core.utils.strings import comma_list, stringify_dict, stringify_value
|
||||
from langchain_core.utils.utils import (
|
||||
build_extra_kwargs,
|
||||
check_package_version,
|
||||
@@ -64,7 +59,6 @@ __all__ = (
|
||||
"pre_init",
|
||||
"print_text",
|
||||
"raise_for_status_with_text",
|
||||
"sanitize_for_postgres",
|
||||
"secret_from_env",
|
||||
"stringify_dict",
|
||||
"stringify_value",
|
||||
@@ -87,7 +81,6 @@ _dynamic_imports = {
|
||||
"try_load_from_hub": "loading",
|
||||
"pre_init": "pydantic",
|
||||
"comma_list": "strings",
|
||||
"sanitize_for_postgres": "strings",
|
||||
"stringify_dict": "strings",
|
||||
"stringify_value": "strings",
|
||||
"build_extra_kwargs": "utils",
|
||||
|
||||
@@ -55,7 +55,7 @@ def _dereference_refs_helper(
|
||||
processed_refs = set()
|
||||
|
||||
# 1) Pure $ref node?
|
||||
if isinstance(obj, dict) and "$ref" in set(obj.keys()):
|
||||
if isinstance(obj, dict) and set(obj.keys()) == {"$ref"}:
|
||||
ref_path = obj["$ref"]
|
||||
# cycle?
|
||||
if ref_path in processed_refs:
|
||||
|
||||
@@ -46,26 +46,3 @@ def comma_list(items: list[Any]) -> str:
|
||||
str: The comma-separated string.
|
||||
"""
|
||||
return ", ".join(str(item) for item in items)
|
||||
|
||||
|
||||
def sanitize_for_postgres(text: str, replacement: str = "") -> str:
|
||||
r"""Sanitize text by removing NUL bytes that are incompatible with PostgreSQL.
|
||||
|
||||
PostgreSQL text fields cannot contain NUL (0x00) bytes, which can cause
|
||||
psycopg.DataError when inserting documents. This function removes or replaces
|
||||
such characters to ensure compatibility.
|
||||
|
||||
Args:
|
||||
text: The text to sanitize.
|
||||
replacement: String to replace NUL bytes with. Defaults to empty string.
|
||||
|
||||
Returns:
|
||||
str: The sanitized text with NUL bytes removed or replaced.
|
||||
|
||||
Example:
|
||||
>>> sanitize_for_postgres("Hello\\x00world")
|
||||
'Helloworld'
|
||||
>>> sanitize_for_postgres("Hello\\x00world", " ")
|
||||
'Hello world'
|
||||
"""
|
||||
return text.replace("\x00", replacement)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""langchain-core version information and utilities."""
|
||||
|
||||
VERSION = "0.3.72"
|
||||
VERSION = "0.3.70"
|
||||
|
||||
@@ -16,7 +16,7 @@ dependencies = [
|
||||
"pydantic>=2.7.4",
|
||||
]
|
||||
name = "langchain-core"
|
||||
version = "0.3.72"
|
||||
version = "0.3.70"
|
||||
description = "Building applications with LLMs through composability"
|
||||
readme = "README.md"
|
||||
|
||||
|
||||
@@ -467,55 +467,6 @@ def test_trace_images_in_openai_format() -> None:
|
||||
]
|
||||
|
||||
|
||||
def test_trace_content_blocks_with_no_type_key() -> None:
|
||||
"""Test that we add a ``type`` key to certain content blocks that don't have one."""
|
||||
llm = ParrotFakeChatModel()
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Hello",
|
||||
},
|
||||
{
|
||||
"cachePoint": {"type": "default"},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
tracer = FakeChatModelStartTracer()
|
||||
response = llm.invoke(messages, config={"callbacks": [tracer]})
|
||||
assert tracer.messages == [
|
||||
[
|
||||
[
|
||||
HumanMessage(
|
||||
[
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Hello",
|
||||
},
|
||||
{
|
||||
"type": "cachePoint",
|
||||
"cachePoint": {"type": "default"},
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
]
|
||||
]
|
||||
# Test no mutation
|
||||
assert response.content == [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Hello",
|
||||
},
|
||||
{
|
||||
"cachePoint": {"type": "default"},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_extend_support_to_openai_multimodal_formats() -> None:
|
||||
"""Test that chat models normalize OpenAI file and audio inputs."""
|
||||
llm = ParrotFakeChatModel()
|
||||
|
||||
@@ -27,7 +27,6 @@ EXPECTED_ALL = [
|
||||
"pre_init",
|
||||
"from_env",
|
||||
"secret_from_env",
|
||||
"sanitize_for_postgres",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
"""Test string utilities."""
|
||||
|
||||
from langchain_core.utils.strings import (
|
||||
comma_list,
|
||||
sanitize_for_postgres,
|
||||
stringify_dict,
|
||||
stringify_value,
|
||||
)
|
||||
|
||||
|
||||
def test_sanitize_for_postgres() -> None:
|
||||
"""Test sanitizing text for PostgreSQL compatibility."""
|
||||
# Test with NUL bytes
|
||||
text_with_nul = "Hello\x00world\x00test"
|
||||
expected = "Helloworldtest"
|
||||
assert sanitize_for_postgres(text_with_nul) == expected
|
||||
|
||||
# Test with replacement character
|
||||
expected_with_replacement = "Hello world test"
|
||||
assert sanitize_for_postgres(text_with_nul, " ") == expected_with_replacement
|
||||
|
||||
# Test with text without NUL bytes
|
||||
clean_text = "Hello world"
|
||||
assert sanitize_for_postgres(clean_text) == clean_text
|
||||
|
||||
# Test empty string
|
||||
assert sanitize_for_postgres("") == ""
|
||||
|
||||
# Test with multiple consecutive NUL bytes
|
||||
text_with_multiple_nuls = "Hello\x00\x00\x00world"
|
||||
assert sanitize_for_postgres(text_with_multiple_nuls) == "Helloworld"
|
||||
assert sanitize_for_postgres(text_with_multiple_nuls, "-") == "Hello---world"
|
||||
|
||||
|
||||
def test_existing_string_functions() -> None:
|
||||
"""Test existing string functions still work."""
|
||||
# Test comma_list
|
||||
assert comma_list([1, 2, 3]) == "1, 2, 3"
|
||||
assert comma_list(["a", "b", "c"]) == "a, b, c"
|
||||
|
||||
# Test stringify_value
|
||||
assert stringify_value("hello") == "hello"
|
||||
assert stringify_value(42) == "42"
|
||||
|
||||
# Test stringify_dict
|
||||
data = {"key": "value", "number": 123}
|
||||
result = stringify_dict(data)
|
||||
assert "key: value" in result
|
||||
assert "number: 123" in result
|
||||
2
libs/core/uv.lock
generated
2
libs/core/uv.lock
generated
@@ -987,7 +987,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain-core"
|
||||
version = "0.3.72"
|
||||
version = "0.3.70"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "jsonpatch" },
|
||||
|
||||
@@ -357,7 +357,7 @@ def __getattr__(name: str) -> Any:
|
||||
|
||||
return ElasticVectorSearch
|
||||
# For backwards compatibility
|
||||
if name in {"SerpAPIChain", "SerpAPIWrapper"}:
|
||||
if name == "SerpAPIChain" or name == "SerpAPIWrapper":
|
||||
from langchain_community.utilities import SerpAPIWrapper
|
||||
|
||||
_warn_on_import(
|
||||
|
||||
@@ -106,12 +106,11 @@ def create_importer(
|
||||
"<https://python.langchain.com/docs/versions/v0_2/>"
|
||||
),
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
msg = f"module {new_module} has no attribute {name}"
|
||||
raise AttributeError(msg) from e
|
||||
|
||||
return result
|
||||
|
||||
if fallback_module:
|
||||
try:
|
||||
module = importlib.import_module(fallback_module)
|
||||
@@ -140,13 +139,12 @@ def create_importer(
|
||||
"<https://python.langchain.com/docs/versions/v0_2/>"
|
||||
),
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
msg = f"module {fallback_module} has no attribute {name}"
|
||||
raise AttributeError(msg) from e
|
||||
|
||||
return result
|
||||
|
||||
msg = f"module {package} has no attribute {name}"
|
||||
raise AttributeError(msg)
|
||||
|
||||
|
||||
@@ -1380,7 +1380,7 @@ class AgentExecutor(Chain):
|
||||
observation = self.handle_parsing_errors(e)
|
||||
else:
|
||||
msg = "Got unexpected type of `handle_parsing_errors`"
|
||||
raise ValueError(msg) from e # noqa: TRY004
|
||||
raise ValueError(msg) from e
|
||||
output = AgentAction("_Exception", observation, text)
|
||||
if run_manager:
|
||||
run_manager.on_agent_action(output, color="green")
|
||||
@@ -1519,7 +1519,7 @@ class AgentExecutor(Chain):
|
||||
observation = self.handle_parsing_errors(e)
|
||||
else:
|
||||
msg = "Got unexpected type of `handle_parsing_errors`"
|
||||
raise ValueError(msg) from e # noqa: TRY004
|
||||
raise ValueError(msg) from e
|
||||
output = AgentAction("_Exception", observation, text)
|
||||
tool_run_kwargs = self._action_agent.tool_run_logging_kwargs()
|
||||
observation = await ExceptionTool().arun(
|
||||
|
||||
@@ -55,7 +55,7 @@ class ChatAgent(Agent):
|
||||
agent_scratchpad = super()._construct_scratchpad(intermediate_steps)
|
||||
if not isinstance(agent_scratchpad, str):
|
||||
msg = "agent_scratchpad should be of type string."
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
if agent_scratchpad:
|
||||
return (
|
||||
f"This was your previous work "
|
||||
|
||||
@@ -40,13 +40,13 @@ def format_xml(
|
||||
# Escape XML tags in tool names and inputs using custom delimiters
|
||||
tool = _escape(action.tool)
|
||||
tool_input = _escape(str(action.tool_input))
|
||||
observation_ = _escape(str(observation))
|
||||
observation = _escape(str(observation))
|
||||
else:
|
||||
tool = action.tool
|
||||
tool_input = str(action.tool_input)
|
||||
observation_ = str(observation)
|
||||
observation = str(observation)
|
||||
log += (
|
||||
f"<tool>{tool}</tool><tool_input>{tool_input}"
|
||||
f"</tool_input><observation>{observation_}</observation>"
|
||||
f"</tool_input><observation>{observation}</observation>"
|
||||
)
|
||||
return log
|
||||
|
||||
@@ -358,12 +358,12 @@ class OpenAIAssistantRunnable(RunnableSerializable[dict, OutputType]):
|
||||
run = self._wait_for_run(run.id, run.thread_id)
|
||||
except BaseException as e:
|
||||
run_manager.on_chain_error(e)
|
||||
raise
|
||||
raise e
|
||||
try:
|
||||
response = self._get_response(run)
|
||||
except BaseException as e:
|
||||
run_manager.on_chain_error(e, metadata=run.dict())
|
||||
raise
|
||||
raise e
|
||||
else:
|
||||
run_manager.on_chain_end(response)
|
||||
return response
|
||||
@@ -494,12 +494,12 @@ class OpenAIAssistantRunnable(RunnableSerializable[dict, OutputType]):
|
||||
run = await self._await_for_run(run.id, run.thread_id)
|
||||
except BaseException as e:
|
||||
run_manager.on_chain_error(e)
|
||||
raise
|
||||
raise e
|
||||
try:
|
||||
response = self._get_response(run)
|
||||
except BaseException as e:
|
||||
run_manager.on_chain_error(e, metadata=run.dict())
|
||||
raise
|
||||
raise e
|
||||
else:
|
||||
run_manager.on_chain_end(response)
|
||||
return response
|
||||
@@ -582,7 +582,7 @@ class OpenAIAssistantRunnable(RunnableSerializable[dict, OutputType]):
|
||||
major_version = int(openai.version.VERSION.split(".")[0])
|
||||
minor_version = int(openai.version.VERSION.split(".")[1])
|
||||
version_gte_1_14 = (major_version > 1) or (
|
||||
major_version == 1 and minor_version >= 14 # noqa: PLR2004
|
||||
major_version == 1 and minor_version >= 14
|
||||
)
|
||||
|
||||
messages = self.client.beta.threads.messages.list(
|
||||
@@ -739,7 +739,7 @@ class OpenAIAssistantRunnable(RunnableSerializable[dict, OutputType]):
|
||||
major_version = int(openai.version.VERSION.split(".")[0])
|
||||
minor_version = int(openai.version.VERSION.split(".")[1])
|
||||
version_gte_1_14 = (major_version > 1) or (
|
||||
major_version == 1 and minor_version >= 14 # noqa: PLR2004
|
||||
major_version == 1 and minor_version >= 14
|
||||
)
|
||||
|
||||
messages = await self.async_client.beta.threads.messages.list(
|
||||
|
||||
@@ -87,7 +87,7 @@ class OpenAIFunctionsAgentOutputParser(AgentOutputParser):
|
||||
) -> Union[AgentAction, AgentFinish]:
|
||||
if not isinstance(result[0], ChatGeneration):
|
||||
msg = "This output parser only works on ChatGeneration output"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
message = result[0].message
|
||||
return self._parse_ai_message(message)
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class OpenAIToolsAgentOutputParser(MultiActionAgentOutputParser):
|
||||
) -> Union[list[AgentAction], AgentFinish]:
|
||||
if not isinstance(result[0], ChatGeneration):
|
||||
msg = "This output parser only works on ChatGeneration output"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
message = result[0].message
|
||||
return parse_ai_message_to_openai_tool_action(message)
|
||||
|
||||
|
||||
@@ -98,7 +98,7 @@ class ToolsAgentOutputParser(MultiActionAgentOutputParser):
|
||||
) -> Union[list[AgentAction], AgentFinish]:
|
||||
if not isinstance(result[0], ChatGeneration):
|
||||
msg = "This output parser only works on ChatGeneration output"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
message = result[0].message
|
||||
return parse_ai_message_to_tool_action(message)
|
||||
|
||||
|
||||
@@ -24,9 +24,6 @@ if TYPE_CHECKING:
|
||||
from langchain_community.docstore.base import Docstore
|
||||
|
||||
|
||||
_LOOKUP_AND_SEARCH_TOOLS = {"Lookup", "Search"}
|
||||
|
||||
|
||||
@deprecated(
|
||||
"0.1.0",
|
||||
message=AGENT_DEPRECATION_WARNING,
|
||||
@@ -55,11 +52,11 @@ class ReActDocstoreAgent(Agent):
|
||||
def _validate_tools(cls, tools: Sequence[BaseTool]) -> None:
|
||||
validate_tools_single_input(cls.__name__, tools)
|
||||
super()._validate_tools(tools)
|
||||
if len(tools) != len(_LOOKUP_AND_SEARCH_TOOLS):
|
||||
if len(tools) != 2:
|
||||
msg = f"Exactly two tools must be specified, but got {tools}"
|
||||
raise ValueError(msg)
|
||||
tool_names = {tool.name for tool in tools}
|
||||
if tool_names != _LOOKUP_AND_SEARCH_TOOLS:
|
||||
if tool_names != {"Lookup", "Search"}:
|
||||
msg = f"Tool names should be Lookup and Search, got {tool_names}"
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ class StructuredChatAgent(Agent):
|
||||
agent_scratchpad = super()._construct_scratchpad(intermediate_steps)
|
||||
if not isinstance(agent_scratchpad, str):
|
||||
msg = "agent_scratchpad should be of type string."
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
if agent_scratchpad:
|
||||
return (
|
||||
f"This was your previous work "
|
||||
|
||||
@@ -71,8 +71,12 @@ class StructuredChatOutputParserWithRetries(AgentOutputParser):
|
||||
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
|
||||
try:
|
||||
if self.output_fixing_parser is not None:
|
||||
return self.output_fixing_parser.parse(text)
|
||||
return self.base_parser.parse(text)
|
||||
parsed_obj: Union[AgentAction, AgentFinish] = (
|
||||
self.output_fixing_parser.parse(text)
|
||||
)
|
||||
else:
|
||||
parsed_obj = self.base_parser.parse(text)
|
||||
return parsed_obj
|
||||
except Exception as e:
|
||||
msg = f"Could not parse LLM output: {text}"
|
||||
raise OutputParserException(msg) from e
|
||||
|
||||
@@ -47,8 +47,8 @@ def _check_in_allowed_domain(url: str, limit_to_domains: Sequence[str]) -> bool:
|
||||
scheme, domain = _extract_scheme_and_domain(url)
|
||||
|
||||
for allowed_domain in limit_to_domains:
|
||||
allowed_scheme, allowed_domain_ = _extract_scheme_and_domain(allowed_domain)
|
||||
if scheme == allowed_scheme and domain == allowed_domain_:
|
||||
allowed_scheme, allowed_domain = _extract_scheme_and_domain(allowed_domain)
|
||||
if scheme == allowed_scheme and domain == allowed_domain:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -174,7 +174,7 @@ class Chain(RunnableSerializable[dict[str, Any], dict[str, Any]], ABC):
|
||||
)
|
||||
except BaseException as e:
|
||||
run_manager.on_chain_error(e)
|
||||
raise
|
||||
raise e
|
||||
run_manager.on_chain_end(outputs)
|
||||
|
||||
if include_run_info:
|
||||
@@ -228,7 +228,7 @@ class Chain(RunnableSerializable[dict[str, Any], dict[str, Any]], ABC):
|
||||
)
|
||||
except BaseException as e:
|
||||
await run_manager.on_chain_error(e)
|
||||
raise
|
||||
raise e
|
||||
await run_manager.on_chain_end(outputs)
|
||||
|
||||
if include_run_info:
|
||||
|
||||
@@ -193,12 +193,13 @@ class MapReduceDocumentsChain(BaseCombineDocumentsChain):
|
||||
"multiple llm_chain input_variables"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
elif values["document_variable_name"] not in llm_chain_variables:
|
||||
msg = (
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
if values["document_variable_name"] not in llm_chain_variables:
|
||||
msg = (
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
return values
|
||||
|
||||
@property
|
||||
|
||||
@@ -127,7 +127,7 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
||||
"Output parser of llm_chain should be a RegexParser,"
|
||||
f" got {output_parser}"
|
||||
)
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
output_keys = output_parser.output_keys
|
||||
if self.rank_key not in output_keys:
|
||||
msg = (
|
||||
@@ -161,12 +161,13 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
||||
"multiple llm_chain input_variables"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
elif values["document_variable_name"] not in llm_chain_variables:
|
||||
msg = (
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
if values["document_variable_name"] not in llm_chain_variables:
|
||||
msg = (
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
return values
|
||||
|
||||
def combine_docs(
|
||||
|
||||
@@ -325,10 +325,10 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
||||
_token_max,
|
||||
**kwargs,
|
||||
)
|
||||
result_docs = [
|
||||
collapse_docs(docs_, _collapse_docs_func, **kwargs)
|
||||
for docs_ in new_result_doc_list
|
||||
]
|
||||
result_docs = []
|
||||
for docs in new_result_doc_list:
|
||||
new_doc = collapse_docs(docs, _collapse_docs_func, **kwargs)
|
||||
result_docs.append(new_doc)
|
||||
num_tokens = length_func(result_docs, **kwargs)
|
||||
retries += 1
|
||||
if self.collapse_max_retries and retries == self.collapse_max_retries:
|
||||
@@ -364,10 +364,10 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
||||
_token_max,
|
||||
**kwargs,
|
||||
)
|
||||
result_docs = [
|
||||
await acollapse_docs(docs_, _collapse_docs_func, **kwargs)
|
||||
for docs_ in new_result_doc_list
|
||||
]
|
||||
result_docs = []
|
||||
for docs in new_result_doc_list:
|
||||
new_doc = await acollapse_docs(docs, _collapse_docs_func, **kwargs)
|
||||
result_docs.append(new_doc)
|
||||
num_tokens = length_func(result_docs, **kwargs)
|
||||
retries += 1
|
||||
if self.collapse_max_retries and retries == self.collapse_max_retries:
|
||||
|
||||
@@ -140,12 +140,13 @@ class RefineDocumentsChain(BaseCombineDocumentsChain):
|
||||
"multiple llm_chain input_variables"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
elif values["document_variable_name"] not in llm_chain_variables:
|
||||
msg = (
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
if values["document_variable_name"] not in llm_chain_variables:
|
||||
msg = (
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
return values
|
||||
|
||||
def combine_docs(
|
||||
|
||||
@@ -180,12 +180,13 @@ class StuffDocumentsChain(BaseCombineDocumentsChain):
|
||||
"multiple llm_chain_variables"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
elif values["document_variable_name"] not in llm_chain_variables:
|
||||
msg = (
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
if values["document_variable_name"] not in llm_chain_variables:
|
||||
msg = (
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
return values
|
||||
|
||||
@property
|
||||
|
||||
@@ -57,7 +57,7 @@ def _get_chat_history(chat_history: list[CHAT_TURN_TYPE]) -> str:
|
||||
f"Unsupported chat history format: {type(dialogue_turn)}."
|
||||
f" Full chat history: {chat_history} "
|
||||
)
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
return buffer
|
||||
|
||||
|
||||
|
||||
@@ -164,13 +164,12 @@ class ElasticsearchDatabaseChain(Chain):
|
||||
chain_result: dict[str, Any] = {self.output_key: final_result}
|
||||
if self.return_intermediate_steps:
|
||||
chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
|
||||
return chain_result
|
||||
except Exception as exc:
|
||||
# Append intermediate steps to exception, to aid in logging and later
|
||||
# improvement of few shot prompt seeds
|
||||
exc.intermediate_steps = intermediate_steps # type: ignore[attr-defined]
|
||||
raise
|
||||
|
||||
return chain_result
|
||||
raise exc
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
|
||||
@@ -251,7 +251,7 @@ class LLMChain(Chain):
|
||||
response = self.generate(input_list, run_manager=run_manager)
|
||||
except BaseException as e:
|
||||
run_manager.on_chain_error(e)
|
||||
raise
|
||||
raise e
|
||||
outputs = self.create_outputs(response)
|
||||
run_manager.on_chain_end({"outputs": outputs})
|
||||
return outputs
|
||||
@@ -276,7 +276,7 @@ class LLMChain(Chain):
|
||||
response = await self.agenerate(input_list, run_manager=run_manager)
|
||||
except BaseException as e:
|
||||
await run_manager.on_chain_error(e)
|
||||
raise
|
||||
raise e
|
||||
outputs = self.create_outputs(response)
|
||||
await run_manager.on_chain_end({"outputs": outputs})
|
||||
return outputs
|
||||
|
||||
@@ -117,7 +117,7 @@ def _load_stuff_documents_chain(config: dict, **kwargs: Any) -> StuffDocumentsCh
|
||||
|
||||
if not isinstance(llm_chain, LLMChain):
|
||||
msg = f"Expected LLMChain, got {llm_chain}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
|
||||
if "document_prompt" in config:
|
||||
prompt_config = config.pop("document_prompt")
|
||||
@@ -150,7 +150,7 @@ def _load_map_reduce_documents_chain(
|
||||
|
||||
if not isinstance(llm_chain, LLMChain):
|
||||
msg = f"Expected LLMChain, got {llm_chain}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
|
||||
if "reduce_documents_chain" in config:
|
||||
reduce_documents_chain = load_chain_from_config(
|
||||
|
||||
@@ -322,7 +322,7 @@ class Crawler:
|
||||
|
||||
if node_name == "#text" and ancestor_exception and ancestor_node:
|
||||
text = strings[node_value[index]]
|
||||
if text in {"|", "•"}:
|
||||
if text == "|" or text == "•":
|
||||
continue
|
||||
ancestor_node.append({"type": "type", "value": text})
|
||||
else:
|
||||
@@ -367,7 +367,7 @@ class Crawler:
|
||||
element_node_value = strings[text_index]
|
||||
|
||||
# remove redundant elements
|
||||
if ancestor_exception and (node_name not in {"a", "button"}):
|
||||
if ancestor_exception and (node_name != "a" and node_name != "button"):
|
||||
continue
|
||||
|
||||
elements_in_view_port.append(
|
||||
@@ -423,7 +423,10 @@ class Crawler:
|
||||
# not very elegant, more like a placeholder
|
||||
if (
|
||||
(converted_node_name != "button" or meta == "")
|
||||
and converted_node_name not in {"link", "input", "img", "textarea"}
|
||||
and converted_node_name != "link"
|
||||
and converted_node_name != "input"
|
||||
and converted_node_name != "img"
|
||||
and converted_node_name != "textarea"
|
||||
) and inner_text.strip() == "":
|
||||
continue
|
||||
|
||||
|
||||
@@ -123,10 +123,7 @@ def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain:
|
||||
Chain (LLMChain) that can be used to answer questions with citations.
|
||||
"""
|
||||
output_parser = PydanticOutputFunctionsParser(pydantic_schema=QuestionAnswer)
|
||||
if hasattr(QuestionAnswer, "model_json_schema"):
|
||||
schema = QuestionAnswer.model_json_schema()
|
||||
else:
|
||||
schema = QuestionAnswer.schema()
|
||||
schema = QuestionAnswer.schema()
|
||||
function = {
|
||||
"name": schema["title"],
|
||||
"description": schema["description"],
|
||||
|
||||
@@ -51,12 +51,13 @@ def _format_url(url: str, path_params: dict) -> str:
|
||||
sep = ","
|
||||
new_val = ""
|
||||
new_val += sep.join(kv_strs)
|
||||
elif param[0] == ".":
|
||||
new_val = f".{val}"
|
||||
elif param[0] == ";":
|
||||
new_val = f";{clean_param}={val}"
|
||||
else:
|
||||
new_val = val
|
||||
if param[0] == ".":
|
||||
new_val = f".{val}"
|
||||
elif param[0] == ";":
|
||||
new_val = f";{clean_param}={val}"
|
||||
else:
|
||||
new_val = val
|
||||
new_params[param] = new_val
|
||||
return url.format(**new_params)
|
||||
|
||||
@@ -223,7 +224,7 @@ class SimpleRequestChain(Chain):
|
||||
_text = f"Calling endpoint {_pretty_name} with arguments:\n" + _pretty_args
|
||||
_run_manager.on_text(_text)
|
||||
api_response: Response = self.request_method(name, args)
|
||||
if api_response.status_code != requests.codes.ok:
|
||||
if api_response.status_code != 200:
|
||||
response = (
|
||||
f"{api_response.status_code}: {api_response.reason}"
|
||||
f"\nFor {name} "
|
||||
@@ -360,13 +361,13 @@ def get_openapi_chain(
|
||||
try:
|
||||
spec = conversion(spec)
|
||||
break
|
||||
except ImportError:
|
||||
raise
|
||||
except ImportError as e:
|
||||
raise e
|
||||
except Exception: # noqa: S110
|
||||
pass
|
||||
if isinstance(spec, str):
|
||||
msg = f"Unable to parse spec from source {spec}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
openai_fns, call_api_fn = openapi_spec_to_openai_fn(spec)
|
||||
if not llm:
|
||||
msg = (
|
||||
|
||||
@@ -125,7 +125,7 @@ class LLMRouterChain(RouterChain):
|
||||
def _validate_outputs(self, outputs: dict[str, Any]) -> None:
|
||||
super()._validate_outputs(outputs)
|
||||
if not isinstance(outputs["next_inputs"], dict):
|
||||
raise ValueError # noqa: TRY004
|
||||
raise ValueError
|
||||
|
||||
def _call(
|
||||
self,
|
||||
@@ -178,10 +178,10 @@ class RouterOutputParser(BaseOutputParser[dict[str, str]]):
|
||||
parsed = parse_and_check_json_markdown(text, expected_keys)
|
||||
if not isinstance(parsed["destination"], str):
|
||||
msg = "Expected 'destination' to be a string."
|
||||
raise TypeError(msg)
|
||||
raise ValueError(msg)
|
||||
if not isinstance(parsed["next_inputs"], self.next_inputs_type):
|
||||
msg = f"Expected 'next_inputs' to be {self.next_inputs_type}."
|
||||
raise TypeError(msg)
|
||||
raise ValueError(msg)
|
||||
parsed["next_inputs"] = {self.next_inputs_inner_key: parsed["next_inputs"]}
|
||||
if (
|
||||
parsed["destination"].strip().lower()
|
||||
@@ -190,7 +190,7 @@ class RouterOutputParser(BaseOutputParser[dict[str, str]]):
|
||||
parsed["destination"] = None
|
||||
else:
|
||||
parsed["destination"] = parsed["destination"].strip()
|
||||
return parsed
|
||||
except Exception as e:
|
||||
msg = f"Parsing text\n{text}\n raised following error:\n{e}"
|
||||
raise OutputParserException(msg) from e
|
||||
return parsed
|
||||
|
||||
@@ -86,60 +86,59 @@ def init_chat_model(
|
||||
config_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> Union[BaseChatModel, _ConfigurableModel]:
|
||||
"""Initialize a ChatModel in a single line using the model's name and provider.
|
||||
"""Initialize a ChatModel from the model name and provider.
|
||||
|
||||
.. note::
|
||||
Must have the integration package corresponding to the model provider installed.
|
||||
You should look at the `provider integration's API reference <https://python.langchain.com/api_reference/reference.html#integrations>`__
|
||||
to see what parameters are supported by the model.
|
||||
**Note:** Must have the integration package corresponding to the model provider
|
||||
installed.
|
||||
|
||||
Args:
|
||||
model: The name of the model, e.g. ``'o3-mini'``, ``'claude-3-5-sonnet-latest'``. You can
|
||||
model: The name of the model, e.g. "o3-mini", "claude-3-5-sonnet-latest". You can
|
||||
also specify model and model provider in a single argument using
|
||||
``'{model_provider}:{model}'`` format, e.g. ``'openai:o1'``.
|
||||
'{model_provider}:{model}' format, e.g. "openai:o1".
|
||||
model_provider: The model provider if not specified as part of model arg (see
|
||||
above). Supported model_provider values and the corresponding integration
|
||||
package are:
|
||||
|
||||
- ``openai`` -> ``langchain-openai``
|
||||
- ``anthropic`` -> ``langchain-anthropic``
|
||||
- ``azure_openai`` -> ``langchain-openai``
|
||||
- ``azure_ai`` -> ``langchain-azure-ai``
|
||||
- ``google_vertexai`` -> ``langchain-google-vertexai``
|
||||
- ``google_genai`` -> ``langchain-google-genai``
|
||||
- ``bedrock`` -> ``langchain-aws``
|
||||
- ``bedrock_converse`` -> ``langchain-aws``
|
||||
- ``cohere`` -> ``langchain-cohere``
|
||||
- ``fireworks`` -> ``langchain-fireworks``
|
||||
- ``together`` -> ``langchain-together``
|
||||
- ``mistralai`` -> ``langchain-mistralai``
|
||||
- ``huggingface`` -> ``langchain-huggingface``
|
||||
- ``groq`` -> ``langchain-groq``
|
||||
- ``ollama`` -> ``langchain-ollama``
|
||||
- ``google_anthropic_vertex`` -> ``langchain-google-vertexai``
|
||||
- ``deepseek`` -> ``langchain-deepseek``
|
||||
- ``ibm`` -> ``langchain-ibm``
|
||||
- ``nvidia`` -> ``langchain-nvidia-ai-endpoints``
|
||||
- ``xai`` -> ``langchain-xai``
|
||||
- ``perplexity`` -> ``langchain-perplexity``
|
||||
- 'openai' -> langchain-openai
|
||||
- 'anthropic' -> langchain-anthropic
|
||||
- 'azure_openai' -> langchain-openai
|
||||
- 'azure_ai' -> langchain-azure-ai
|
||||
- 'google_vertexai' -> langchain-google-vertexai
|
||||
- 'google_genai' -> langchain-google-genai
|
||||
- 'bedrock' -> langchain-aws
|
||||
- 'bedrock_converse' -> langchain-aws
|
||||
- 'cohere' -> langchain-cohere
|
||||
- 'fireworks' -> langchain-fireworks
|
||||
- 'together' -> langchain-together
|
||||
- 'mistralai' -> langchain-mistralai
|
||||
- 'huggingface' -> langchain-huggingface
|
||||
- 'groq' -> langchain-groq
|
||||
- 'ollama' -> langchain-ollama
|
||||
- 'google_anthropic_vertex' -> langchain-google-vertexai
|
||||
- 'deepseek' -> langchain-deepseek
|
||||
- 'ibm' -> langchain-ibm
|
||||
- 'nvidia' -> langchain-nvidia-ai-endpoints
|
||||
- 'xai' -> langchain-xai
|
||||
- 'perplexity' -> langchain-perplexity
|
||||
|
||||
Will attempt to infer model_provider from model if not specified. The
|
||||
following providers will be inferred based on these model prefixes:
|
||||
|
||||
- ``gpt-3...`` | ``gpt-4...`` | ``o1...`` -> ``openai``
|
||||
- ``claude...`` -> ``anthropic``
|
||||
- ``amazon...`` -> ``bedrock``
|
||||
- ``gemini...`` -> ``google_vertexai``
|
||||
- ``command...`` -> ``cohere``
|
||||
- ``accounts/fireworks...`` -> ``fireworks``
|
||||
- ``mistral...`` -> ``mistralai``
|
||||
- ``deepseek...`` -> ``deepseek``
|
||||
- ``grok...`` -> ``xai``
|
||||
- ``sonar...`` -> ``perplexity``
|
||||
configurable_fields: Which model parameters are configurable:
|
||||
- 'gpt-3...' | 'gpt-4...' | 'o1...' -> 'openai'
|
||||
- 'claude...' -> 'anthropic'
|
||||
- 'amazon....' -> 'bedrock'
|
||||
- 'gemini...' -> 'google_vertexai'
|
||||
- 'command...' -> 'cohere'
|
||||
- 'accounts/fireworks...' -> 'fireworks'
|
||||
- 'mistral...' -> 'mistralai'
|
||||
- 'deepseek...' -> 'deepseek'
|
||||
- 'grok...' -> 'xai'
|
||||
- 'sonar...' -> 'perplexity'
|
||||
configurable_fields: Which model parameters are
|
||||
configurable:
|
||||
|
||||
- None: No configurable fields.
|
||||
- ``'any'``: All fields are configurable. **See Security Note below.**
|
||||
- "any": All fields are configurable. *See Security Note below.*
|
||||
- Union[List[str], Tuple[str, ...]]: Specified fields are configurable.
|
||||
|
||||
Fields are assumed to have config_prefix stripped if there is a
|
||||
@@ -147,15 +146,15 @@ def init_chat_model(
|
||||
not specified, then defaults to ``("model", "model_provider")``.
|
||||
|
||||
***Security Note***: Setting ``configurable_fields="any"`` means fields like
|
||||
``api_key``, ``base_url``, etc. can be altered at runtime, potentially redirecting
|
||||
api_key, base_url, etc. can be altered at runtime, potentially redirecting
|
||||
model requests to a different service/user. Make sure that if you're
|
||||
accepting untrusted configurations that you enumerate the
|
||||
``configurable_fields=(...)`` explicitly.
|
||||
|
||||
config_prefix: If ``'config_prefix'`` is a non-empty string then model will be
|
||||
config_prefix: If config_prefix is a non-empty string then model will be
|
||||
configurable at runtime via the
|
||||
``config["configurable"]["{config_prefix}_{param}"]`` keys. If
|
||||
``'config_prefix'`` is an empty string then model will be configurable via
|
||||
config_prefix is an empty string then model will be configurable via
|
||||
``config["configurable"]["{param}"]``.
|
||||
temperature: Model temperature.
|
||||
max_tokens: Max output tokens.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""**Embedding models** are wrappers around embedding models
|
||||
from different APIs and services.
|
||||
|
||||
Embedding models can be LLMs or not.
|
||||
**Embedding models** can be LLMs or not.
|
||||
|
||||
**Class hierarchy:**
|
||||
|
||||
@@ -87,7 +87,7 @@ class HypotheticalDocumentEmbedder:
|
||||
)
|
||||
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H
|
||||
|
||||
return H(*args, **kwargs) # type: ignore[return-value] # noqa: PLE0101
|
||||
return H(*args, **kwargs) # type: ignore[return-value]
|
||||
|
||||
@classmethod
|
||||
def from_llm(cls, *args: Any, **kwargs: Any) -> Any:
|
||||
|
||||
@@ -89,6 +89,7 @@ def _infer_model_and_provider(
|
||||
if provider is None and ":" in model:
|
||||
provider, model_name = _parse_model_string(model)
|
||||
else:
|
||||
provider = provider
|
||||
model_name = model
|
||||
|
||||
if not provider:
|
||||
|
||||
@@ -89,7 +89,7 @@ _warned_about_sha1: bool = False
|
||||
|
||||
def _warn_about_sha1_encoder() -> None:
|
||||
"""Emit a one-time warning about SHA-1 collision weaknesses."""
|
||||
global _warned_about_sha1 # noqa: PLW0603
|
||||
global _warned_about_sha1
|
||||
if not _warned_about_sha1:
|
||||
warnings.warn(
|
||||
"Using default key encoder: SHA-1 is *not* collision-resistant. "
|
||||
@@ -340,7 +340,7 @@ class CacheBackedEmbeddings(Embeddings):
|
||||
"key_encoder must be either 'blake2b', 'sha1', 'sha256', 'sha512' "
|
||||
"or a callable that encodes keys."
|
||||
)
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
|
||||
document_embedding_store = EncoderBackedStore[str, list[float]](
|
||||
document_embedding_cache,
|
||||
|
||||
@@ -36,8 +36,6 @@ from langchain.evaluation.agents.trajectory_eval_prompt import (
|
||||
)
|
||||
from langchain.evaluation.schema import AgentTrajectoryEvaluator, LLMEvalChain
|
||||
|
||||
_MAX_SCORE = 5
|
||||
|
||||
|
||||
class TrajectoryEval(TypedDict):
|
||||
"""A named tuple containing the score and reasoning for a trajectory."""
|
||||
@@ -88,10 +86,10 @@ class TrajectoryOutputParser(BaseOutputParser):
|
||||
raise OutputParserException(msg)
|
||||
score = int(_score.group(1))
|
||||
# If the score is not in the range 1-5, raise an exception.
|
||||
if not 1 <= score <= _MAX_SCORE:
|
||||
if not 1 <= score <= 5:
|
||||
msg = f"Score is not a digit in the range 1-5: {text}"
|
||||
raise OutputParserException(msg)
|
||||
normalized_score = (score - 1) / (_MAX_SCORE - 1)
|
||||
normalized_score = (score - 1) / 4
|
||||
return TrajectoryEval(score=normalized_score, reasoning=reasoning)
|
||||
|
||||
|
||||
|
||||
@@ -23,10 +23,11 @@ from langchain.schema import RUN_KEY
|
||||
def _import_numpy() -> Any:
|
||||
try:
|
||||
import numpy as np
|
||||
|
||||
return np
|
||||
except ImportError as e:
|
||||
msg = "Could not import numpy, please install with `pip install numpy`."
|
||||
raise ImportError(msg) from e
|
||||
return np
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -74,9 +74,9 @@ class JsonValidityEvaluator(StringEvaluator):
|
||||
"""
|
||||
try:
|
||||
parse_json_markdown(prediction, parser=json.loads)
|
||||
return {"score": 1}
|
||||
except Exception as e:
|
||||
return {"score": 0, "reasoning": str(e)}
|
||||
return {"score": 1}
|
||||
|
||||
|
||||
class JsonEqualityEvaluator(StringEvaluator):
|
||||
|
||||
@@ -70,11 +70,8 @@ class JsonSchemaEvaluator(StringEvaluator):
|
||||
def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]:
|
||||
if isinstance(node, str):
|
||||
return parse_json_markdown(node)
|
||||
if hasattr(node, "model_json_schema") and callable(node.model_json_schema):
|
||||
# Pydantic v2 model
|
||||
return node.model_json_schema()
|
||||
if hasattr(node, "schema") and callable(node.schema):
|
||||
# Pydantic v1 model
|
||||
# Pydantic model
|
||||
return node.schema()
|
||||
return node
|
||||
|
||||
@@ -83,9 +80,11 @@ class JsonSchemaEvaluator(StringEvaluator):
|
||||
|
||||
try:
|
||||
validate(instance=prediction, schema=schema)
|
||||
return {
|
||||
"score": True,
|
||||
}
|
||||
except ValidationError as e:
|
||||
return {"score": False, "reasoning": repr(e)}
|
||||
return {"score": True}
|
||||
|
||||
@override
|
||||
def _evaluate_strings(
|
||||
|
||||
@@ -39,7 +39,7 @@ def set_verbose(
|
||||
# have migrated to using `set_verbose()` here.
|
||||
langchain.verbose = value
|
||||
|
||||
global _verbose # noqa: PLW0603
|
||||
global _verbose
|
||||
_verbose = value
|
||||
|
||||
|
||||
@@ -69,6 +69,7 @@ def get_verbose() -> bool:
|
||||
# directing them to use `set_verbose()` when they import `langchain.verbose`.
|
||||
old_verbose = langchain.verbose
|
||||
|
||||
global _verbose
|
||||
return _verbose or old_verbose
|
||||
|
||||
|
||||
@@ -93,7 +94,7 @@ def set_debug(
|
||||
# have migrated to using `set_debug()` here.
|
||||
langchain.debug = value
|
||||
|
||||
global _debug # noqa: PLW0603
|
||||
global _debug
|
||||
_debug = value
|
||||
|
||||
|
||||
@@ -121,6 +122,7 @@ def get_debug() -> bool:
|
||||
# directing them to use `set_debug()` when they import `langchain.debug`.
|
||||
old_debug = langchain.debug
|
||||
|
||||
global _debug
|
||||
return _debug or old_debug
|
||||
|
||||
|
||||
@@ -145,7 +147,7 @@ def set_llm_cache(value: Optional["BaseCache"]) -> None:
|
||||
# once all users have migrated to using `set_llm_cache()` here.
|
||||
langchain.llm_cache = value
|
||||
|
||||
global _llm_cache # noqa: PLW0603
|
||||
global _llm_cache
|
||||
_llm_cache = value
|
||||
|
||||
|
||||
@@ -177,4 +179,5 @@ def get_llm_cache() -> "BaseCache":
|
||||
# to use `set_llm_cache()` when they import `langchain.llm_cache`.
|
||||
old_llm_cache = langchain.llm_cache
|
||||
|
||||
global _llm_cache
|
||||
return _llm_cache or old_llm_cache
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Interface with the `LangChain Hub <https://smith.langchain.com/hub>`__."""
|
||||
"""Interface with the LangChain Hub."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -153,7 +153,7 @@ class SQLRecordManager(RecordManager):
|
||||
"""Create the database schema."""
|
||||
if isinstance(self.engine, AsyncEngine):
|
||||
msg = "This method is not supported for async engines."
|
||||
raise AssertionError(msg) # noqa: TRY004
|
||||
raise AssertionError(msg)
|
||||
|
||||
Base.metadata.create_all(self.engine)
|
||||
|
||||
@@ -162,7 +162,7 @@ class SQLRecordManager(RecordManager):
|
||||
|
||||
if not isinstance(self.engine, AsyncEngine):
|
||||
msg = "This method is not supported for sync engines."
|
||||
raise AssertionError(msg) # noqa: TRY004
|
||||
raise AssertionError(msg)
|
||||
|
||||
async with self.engine.begin() as session:
|
||||
await session.run_sync(Base.metadata.create_all)
|
||||
@@ -173,7 +173,7 @@ class SQLRecordManager(RecordManager):
|
||||
|
||||
if isinstance(self.session_factory, async_sessionmaker):
|
||||
msg = "This method is not supported for async engines."
|
||||
raise AssertionError(msg) # noqa: TRY004
|
||||
raise AssertionError(msg)
|
||||
|
||||
session = self.session_factory()
|
||||
try:
|
||||
@@ -187,7 +187,7 @@ class SQLRecordManager(RecordManager):
|
||||
|
||||
if not isinstance(self.session_factory, async_sessionmaker):
|
||||
msg = "This method is not supported for sync engines."
|
||||
raise AssertionError(msg) # noqa: TRY004
|
||||
raise AssertionError(msg)
|
||||
|
||||
async with self.session_factory() as session:
|
||||
yield session
|
||||
@@ -221,7 +221,7 @@ class SQLRecordManager(RecordManager):
|
||||
dt = float(dt)
|
||||
if not isinstance(dt, float):
|
||||
msg = f"Unexpected type for datetime: {type(dt)}"
|
||||
raise AssertionError(msg) # noqa: TRY004
|
||||
raise AssertionError(msg)
|
||||
return dt
|
||||
|
||||
async def aget_time(self) -> float:
|
||||
@@ -254,7 +254,7 @@ class SQLRecordManager(RecordManager):
|
||||
dt = float(dt)
|
||||
if not isinstance(dt, float):
|
||||
msg = f"Unexpected type for datetime: {type(dt)}"
|
||||
raise AssertionError(msg) # noqa: TRY004
|
||||
raise AssertionError(msg)
|
||||
return dt
|
||||
|
||||
def update(
|
||||
|
||||
@@ -128,7 +128,7 @@ class UpstashRedisEntityStore(BaseEntityStore):
|
||||
self.redis_client = Redis(url=url, token=token)
|
||||
except Exception as exc:
|
||||
error_msg = "Upstash Redis instance could not be initiated"
|
||||
logger.exception(error_msg)
|
||||
logger.error(error_msg)
|
||||
raise RuntimeError(error_msg) from exc
|
||||
|
||||
self.session_id = session_id
|
||||
@@ -237,8 +237,8 @@ class RedisEntityStore(BaseEntityStore):
|
||||
|
||||
try:
|
||||
self.redis_client = get_client(redis_url=url, decode_responses=True)
|
||||
except redis.exceptions.ConnectionError:
|
||||
logger.exception("Redis client could not connect")
|
||||
except redis.exceptions.ConnectionError as error:
|
||||
logger.error(error)
|
||||
|
||||
self.session_id = session_id
|
||||
self.key_prefix = key_prefix
|
||||
|
||||
@@ -39,7 +39,7 @@ class ModelLaboratory:
|
||||
"If you want to initialize with LLMs, use the `from_llms` method "
|
||||
"instead (`ModelLaboratory.from_llms(...)`)"
|
||||
)
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
if len(chain.input_keys) != 1:
|
||||
msg = (
|
||||
"Currently only support chains with one input variable, "
|
||||
|
||||
@@ -5,8 +5,6 @@ from typing import Any
|
||||
from langchain_core.output_parsers import BaseOutputParser
|
||||
from langchain_core.utils import pre_init
|
||||
|
||||
_MIN_PARSERS = 2
|
||||
|
||||
|
||||
class CombiningOutputParser(BaseOutputParser[dict[str, Any]]):
|
||||
"""Combine multiple output parsers into one."""
|
||||
@@ -21,7 +19,7 @@ class CombiningOutputParser(BaseOutputParser[dict[str, Any]]):
|
||||
def validate_parsers(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Validate the parsers."""
|
||||
parsers = values["parsers"]
|
||||
if len(parsers) < _MIN_PARSERS:
|
||||
if len(parsers) < 2:
|
||||
msg = "Must have at least two parsers"
|
||||
raise ValueError(msg)
|
||||
for parser in parsers:
|
||||
|
||||
@@ -70,7 +70,7 @@ class OutputFixingParser(BaseOutputParser[T]):
|
||||
return self.parser.parse(completion)
|
||||
except OutputParserException as e:
|
||||
if retries == self.max_retries:
|
||||
raise
|
||||
raise e
|
||||
retries += 1
|
||||
if self.legacy and hasattr(self.retry_chain, "run"):
|
||||
completion = self.retry_chain.run(
|
||||
@@ -107,7 +107,7 @@ class OutputFixingParser(BaseOutputParser[T]):
|
||||
return await self.parser.aparse(completion)
|
||||
except OutputParserException as e:
|
||||
if retries == self.max_retries:
|
||||
raise
|
||||
raise e
|
||||
retries += 1
|
||||
if self.legacy and hasattr(self.retry_chain, "arun"):
|
||||
completion = await self.retry_chain.arun(
|
||||
|
||||
@@ -79,7 +79,7 @@ class PandasDataFrameOutputParser(BaseOutputParser[dict[str, Any]]):
|
||||
def parse(self, request: str) -> dict[str, Any]:
|
||||
stripped_request_params = None
|
||||
splitted_request = request.strip().split(":")
|
||||
if len(splitted_request) != 2: # noqa: PLR2004
|
||||
if len(splitted_request) != 2:
|
||||
msg = f"Request '{request}' is not correctly formatted. \
|
||||
Please refer to the format instructions."
|
||||
raise OutputParserException(msg)
|
||||
@@ -127,15 +127,16 @@ class PandasDataFrameOutputParser(BaseOutputParser[dict[str, Any]]):
|
||||
filtered_df[stripped_request_params],
|
||||
request_type,
|
||||
)()
|
||||
elif request_type == "column":
|
||||
result[request_params] = self.dataframe[request_params]
|
||||
elif request_type == "row":
|
||||
result[request_params] = self.dataframe.iloc[int(request_params)]
|
||||
else:
|
||||
result[request_type] = getattr(
|
||||
self.dataframe[request_params],
|
||||
request_type,
|
||||
)()
|
||||
if request_type == "column":
|
||||
result[request_params] = self.dataframe[request_params]
|
||||
elif request_type == "row":
|
||||
result[request_params] = self.dataframe.iloc[int(request_params)]
|
||||
else:
|
||||
result[request_type] = getattr(
|
||||
self.dataframe[request_params],
|
||||
request_type,
|
||||
)()
|
||||
except (AttributeError, IndexError, KeyError) as e:
|
||||
if request_type not in {"column", "row"}:
|
||||
msg = f"Unsupported request type '{request_type}'. \
|
||||
|
||||
@@ -104,9 +104,9 @@ class RetryOutputParser(BaseOutputParser[T]):
|
||||
while retries <= self.max_retries:
|
||||
try:
|
||||
return self.parser.parse(completion)
|
||||
except OutputParserException:
|
||||
except OutputParserException as e:
|
||||
if retries == self.max_retries:
|
||||
raise
|
||||
raise e
|
||||
retries += 1
|
||||
if self.legacy and hasattr(self.retry_chain, "run"):
|
||||
completion = self.retry_chain.run(
|
||||
@@ -141,7 +141,7 @@ class RetryOutputParser(BaseOutputParser[T]):
|
||||
return await self.parser.aparse(completion)
|
||||
except OutputParserException as e:
|
||||
if retries == self.max_retries:
|
||||
raise
|
||||
raise e
|
||||
retries += 1
|
||||
if self.legacy and hasattr(self.retry_chain, "arun"):
|
||||
completion = await self.retry_chain.arun(
|
||||
@@ -232,7 +232,7 @@ class RetryWithErrorOutputParser(BaseOutputParser[T]):
|
||||
return self.parser.parse(completion)
|
||||
except OutputParserException as e:
|
||||
if retries == self.max_retries:
|
||||
raise
|
||||
raise e
|
||||
retries += 1
|
||||
if self.legacy and hasattr(self.retry_chain, "run"):
|
||||
completion = self.retry_chain.run(
|
||||
@@ -260,7 +260,7 @@ class RetryWithErrorOutputParser(BaseOutputParser[T]):
|
||||
return await self.parser.aparse(completion)
|
||||
except OutputParserException as e:
|
||||
if retries == self.max_retries:
|
||||
raise
|
||||
raise e
|
||||
retries += 1
|
||||
if self.legacy and hasattr(self.retry_chain, "arun"):
|
||||
completion = await self.retry_chain.arun(
|
||||
|
||||
@@ -43,15 +43,7 @@ class YamlOutputParser(BaseOutputParser[T]):
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
# Copy schema to avoid altering original Pydantic schema.
|
||||
if hasattr(self.pydantic_object, "model_json_schema"):
|
||||
# Pydantic v2
|
||||
schema = dict(self.pydantic_object.model_json_schema().items())
|
||||
elif hasattr(self.pydantic_object, "schema"):
|
||||
# Pydantic v1
|
||||
schema = dict(self.pydantic_object.schema().items())
|
||||
else:
|
||||
msg = "Pydantic object must have either model_json_schema or schema method"
|
||||
raise ValueError(msg)
|
||||
schema = dict(self.pydantic_object.schema().items())
|
||||
|
||||
# Remove extraneous fields.
|
||||
reduced_schema = schema
|
||||
|
||||
@@ -48,7 +48,7 @@ class DocumentCompressorPipeline(BaseDocumentCompressor):
|
||||
documents = _transformer.transform_documents(documents)
|
||||
else:
|
||||
msg = f"Got unexpected transformer type: {_transformer}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
return documents
|
||||
|
||||
async def acompress_documents(
|
||||
@@ -78,5 +78,5 @@ class DocumentCompressorPipeline(BaseDocumentCompressor):
|
||||
documents = await _transformer.atransform_documents(documents)
|
||||
else:
|
||||
msg = f"Got unexpected transformer type: {_transformer}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
return documents
|
||||
|
||||
@@ -70,8 +70,9 @@ class LLMChainFilter(BaseDocumentCompressor):
|
||||
output = output_[self.llm_chain.output_key]
|
||||
if self.llm_chain.prompt.output_parser is not None:
|
||||
include_doc = self.llm_chain.prompt.output_parser.parse(output)
|
||||
elif isinstance(output_, bool):
|
||||
include_doc = output_
|
||||
else:
|
||||
if isinstance(output_, bool):
|
||||
include_doc = output_
|
||||
if include_doc:
|
||||
filtered_docs.append(doc)
|
||||
|
||||
@@ -100,8 +101,9 @@ class LLMChainFilter(BaseDocumentCompressor):
|
||||
output = output_[self.llm_chain.output_key]
|
||||
if self.llm_chain.prompt.output_parser is not None:
|
||||
include_doc = self.llm_chain.prompt.output_parser.parse(output)
|
||||
elif isinstance(output_, bool):
|
||||
include_doc = output_
|
||||
else:
|
||||
if isinstance(output_, bool):
|
||||
include_doc = output_
|
||||
if include_doc:
|
||||
filtered_docs.append(doc)
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ class EnsembleRetriever(BaseRetriever):
|
||||
result = self.rank_fusion(input, run_manager=run_manager, config=config)
|
||||
except Exception as e:
|
||||
run_manager.on_retriever_error(e)
|
||||
raise
|
||||
raise e
|
||||
else:
|
||||
run_manager.on_retriever_end(
|
||||
result,
|
||||
@@ -157,7 +157,7 @@ class EnsembleRetriever(BaseRetriever):
|
||||
)
|
||||
except Exception as e:
|
||||
await run_manager.on_retriever_error(e)
|
||||
raise
|
||||
raise e
|
||||
else:
|
||||
await run_manager.on_retriever_end(
|
||||
result,
|
||||
|
||||
@@ -558,7 +558,7 @@ def _construct_run_evaluator(
|
||||
return run_evaluator_dec(eval_config)
|
||||
else:
|
||||
msg = f"Unknown evaluator type: {type(eval_config)}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
|
||||
if isinstance(evaluator_, StringEvaluator):
|
||||
if evaluator_.requires_reference and reference_key is None:
|
||||
@@ -668,7 +668,7 @@ def _load_run_evaluators(
|
||||
f"Unsupported custom evaluator: {custom_evaluator}."
|
||||
f" Expected RunEvaluator or StringEvaluator."
|
||||
)
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg)
|
||||
|
||||
return run_evaluators
|
||||
|
||||
@@ -1040,7 +1040,7 @@ def _prepare_eval_run(
|
||||
)
|
||||
except (HTTPError, ValueError, LangSmithError) as e:
|
||||
if "already exists " not in str(e):
|
||||
raise
|
||||
raise e
|
||||
uid = uuid.uuid4()
|
||||
example_msg = f"""
|
||||
run_on_dataset(
|
||||
@@ -1123,9 +1123,9 @@ class _DatasetRunContainer:
|
||||
run_id=None,
|
||||
project_id=self.project.id,
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Error running batch evaluator %s", repr(evaluator)
|
||||
"Error running batch evaluator %s: %s", repr(evaluator), e
|
||||
)
|
||||
return aggregate_feedback
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@ authors = []
|
||||
license = { text = "MIT" }
|
||||
requires-python = ">=3.9, <4.0"
|
||||
dependencies = [
|
||||
"langchain-core<1.0.0,>=0.3.72",
|
||||
"langchain-text-splitters<1.0.0,>=0.3.9",
|
||||
"langchain-core<1.0.0,>=0.3.66",
|
||||
"langchain-text-splitters<1.0.0,>=0.3.8",
|
||||
"langsmith>=0.1.17",
|
||||
"pydantic<3.0.0,>=2.7.4",
|
||||
"SQLAlchemy<3,>=1.4",
|
||||
@@ -17,7 +17,7 @@ dependencies = [
|
||||
"async-timeout<5.0.0,>=4.0.0; python_version < \"3.11\"",
|
||||
]
|
||||
name = "langchain"
|
||||
version = "0.3.27"
|
||||
version = "0.3.26"
|
||||
description = "Building applications with LLMs through composability"
|
||||
readme = "README.md"
|
||||
|
||||
@@ -147,9 +147,11 @@ select = [
|
||||
"A", # flake8-builtins
|
||||
"B", # flake8-bugbear
|
||||
"ASYNC", # flake8-async
|
||||
"B", # flake8-bugbear
|
||||
"C4", # flake8-comprehensions
|
||||
"COM", # flake8-commas
|
||||
"D", # pydocstyle
|
||||
"DOC", # pydoclint
|
||||
"DTZ", # flake8-datetimez
|
||||
"E", # pycodestyle error
|
||||
"EM", # flake8-errmsg
|
||||
@@ -162,10 +164,9 @@ select = [
|
||||
"ICN", # flake8-import-conventions
|
||||
"INT", # flake8-gettext
|
||||
"ISC", # isort-comprehensions
|
||||
"PERF", # flake8-perf
|
||||
"PGH", # pygrep-hooks
|
||||
"PIE", # flake8-pie
|
||||
"PL", # pylint
|
||||
"PERF", # flake8-perf
|
||||
"PT", # flake8-pytest-style
|
||||
"PTH", # flake8-use-pathlib
|
||||
"PYI", # flake8-pyi
|
||||
@@ -174,13 +175,12 @@ select = [
|
||||
"RSE", # flake8-rst-docstrings
|
||||
"RUF", # ruff
|
||||
"S", # flake8-bandit
|
||||
"SLF", # flake8-self
|
||||
"SLOT", # flake8-slots
|
||||
"SIM", # flake8-simplify
|
||||
"SLF", # flake8-self
|
||||
"T10", # flake8-debugger
|
||||
"T20", # flake8-print
|
||||
"TID", # flake8-tidy-imports
|
||||
"TRY", # tryceratops
|
||||
"UP", # pyupgrade
|
||||
"W", # pycodestyle warning
|
||||
"YTT", # flake8-2020
|
||||
@@ -197,15 +197,10 @@ ignore = [
|
||||
"COM812", # Messes with the formatter
|
||||
"ISC001", # Messes with the formatter
|
||||
"PERF203", # Rarely useful
|
||||
"PLR09", # Too many something (args, statements, etc)
|
||||
"S112", # Rarely useful
|
||||
"RUF012", # Doesn't play well with Pydantic
|
||||
"SLF001", # Private member access
|
||||
"UP007", # pyupgrade: non-pep604-annotation-union
|
||||
|
||||
# TODO rules
|
||||
"PLC0415", # pylint: import-outside-top-level
|
||||
"TRY301", # tryceratops: raise-within-try
|
||||
]
|
||||
unfixable = ["B028"] # People should intentionally tune the stacklevel
|
||||
|
||||
@@ -218,10 +213,9 @@ pyupgrade.keep-runtime-typing = true
|
||||
|
||||
[tool.ruff.lint.extend-per-file-ignores]
|
||||
"tests/**/*.py" = [
|
||||
"S101", # Tests need assertions
|
||||
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
|
||||
"SLF001", # Private member access in tests
|
||||
"PLR2004", # Magic value comparisons
|
||||
"S101", # Tests need assertions
|
||||
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
|
||||
"SLF001", # Private member access in tests
|
||||
]
|
||||
"langchain/chains/constitutional_ai/principles.py" = [
|
||||
"E501", # Line too long
|
||||
|
||||
@@ -130,7 +130,8 @@ def pytest_collection_modifyitems(
|
||||
pytest.mark.skip(reason=f"Requires pkg: `{pkg}`"),
|
||||
)
|
||||
break
|
||||
elif only_extended:
|
||||
item.add_marker(
|
||||
pytest.mark.skip(reason="Skipping not an extended test."),
|
||||
)
|
||||
else:
|
||||
if only_extended:
|
||||
item.add_marker(
|
||||
pytest.mark.skip(reason="Skipping not an extended test."),
|
||||
)
|
||||
|
||||
@@ -105,7 +105,7 @@ class GenericFakeChatModel(BaseChatModel):
|
||||
f"Expected generate to return a ChatResult, "
|
||||
f"but got {type(chat_result)} instead."
|
||||
)
|
||||
raise TypeError(msg)
|
||||
raise ValueError(msg)
|
||||
|
||||
message = chat_result.generations[0].message
|
||||
|
||||
@@ -114,7 +114,7 @@ class GenericFakeChatModel(BaseChatModel):
|
||||
f"Expected invoke to return an AIMessage, "
|
||||
f"but got {type(message)} instead."
|
||||
)
|
||||
raise TypeError(msg)
|
||||
raise ValueError(msg)
|
||||
|
||||
content = message.content
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import pytest
|
||||
import pytest as pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain.retrievers.multi_query import LineListOutputParser, _unique_documents
|
||||
|
||||
@@ -10,8 +10,6 @@ class AnyStr(str):
|
||||
def __eq__(self, other: object) -> bool:
|
||||
return isinstance(other, str)
|
||||
|
||||
__hash__ = str.__hash__
|
||||
|
||||
|
||||
# The code below creates version of pydantic models
|
||||
# that will work in unit tests with AnyStr as id field
|
||||
|
||||
5880
libs/langchain/uv.lock
generated
5880
libs/langchain/uv.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +0,0 @@
|
||||
.venv
|
||||
.github
|
||||
.git
|
||||
.mypy_cache
|
||||
.pytest_cache
|
||||
Dockerfile
|
||||
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -1,101 +0,0 @@
|
||||
.PHONY: all clean docs_build docs_clean docs_linkcheck api_docs_build api_docs_clean api_docs_linkcheck format lint test tests test_watch integration_tests docker_tests help extended_tests
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
######################
|
||||
# TESTING AND COVERAGE
|
||||
######################
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
|
||||
.EXPORT_ALL_VARIABLES:
|
||||
UV_FROZEN = true
|
||||
|
||||
# Run unit tests and generate a coverage report.
|
||||
coverage:
|
||||
uv run --group test pytest --cov \
|
||||
--cov-config=.coveragerc \
|
||||
--cov-report xml \
|
||||
--cov-report term-missing:skip-covered \
|
||||
$(TEST_FILE)
|
||||
|
||||
test tests:
|
||||
uv run --group test pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)
|
||||
|
||||
extended_tests:
|
||||
uv run --group test pytest --disable-socket --allow-unix-socket --only-extended tests/unit_tests
|
||||
|
||||
test_watch:
|
||||
uv run --group test ptw --snapshot-update --now . -- -x --disable-socket --allow-unix-socket --disable-warnings tests/unit_tests
|
||||
|
||||
test_watch_extended:
|
||||
uv run --group test ptw --snapshot-update --now . -- -x --disable-socket --allow-unix-socket --only-extended tests/unit_tests
|
||||
|
||||
integration_tests:
|
||||
uv run --group test --group test_integration pytest tests/integration_tests
|
||||
|
||||
docker_tests:
|
||||
docker build -t my-langchain-image:test .
|
||||
docker run --rm my-langchain-image:test
|
||||
|
||||
check_imports: $(shell find langchain -name '*.py')
|
||||
uv run python ./scripts/check_imports.py $^
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
######################
|
||||
|
||||
# Define a variable for Python and notebook files.
|
||||
PYTHON_FILES=.
|
||||
MYPY_CACHE=.mypy_cache
|
||||
lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/langchain --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
||||
lint_package: PYTHON_FILES=langchain
|
||||
lint_tests: PYTHON_FILES=tests
|
||||
lint_tests: MYPY_CACHE=.mypy_cache_test
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
|
||||
format format_diff:
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check --fix $(PYTHON_FILES)
|
||||
|
||||
spell_check:
|
||||
uv run --all-groups codespell --toml pyproject.toml
|
||||
|
||||
spell_fix:
|
||||
uv run --all-groups codespell --toml pyproject.toml -w
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
help:
|
||||
@echo '===================='
|
||||
@echo 'clean - run docs_clean and api_docs_clean'
|
||||
@echo 'docs_build - build the documentation'
|
||||
@echo 'docs_clean - clean the documentation build artifacts'
|
||||
@echo 'docs_linkcheck - run linkchecker on the documentation'
|
||||
@echo 'api_docs_build - build the API Reference documentation'
|
||||
@echo 'api_docs_clean - clean the API Reference documentation build artifacts'
|
||||
@echo 'api_docs_linkcheck - run linkchecker on the API Reference documentation'
|
||||
@echo '-- LINTING --'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'spell_check - run codespell on the project'
|
||||
@echo 'spell_fix - run codespell on the project and fix the errors'
|
||||
@echo '-- TESTS --'
|
||||
@echo 'coverage - run unit tests and generate coverage report'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'tests - run unit tests (alias for "make test")'
|
||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
||||
@echo 'extended_tests - run only extended unit tests'
|
||||
@echo 'test_watch - run unit tests in watch mode'
|
||||
@echo 'integration_tests - run integration tests'
|
||||
@echo 'docker_tests - run unit tests in docker'
|
||||
@echo '-- DOCUMENTATION tasks are from the top-level Makefile --'
|
||||
@@ -1,91 +0,0 @@
|
||||
# 🦜️🔗 LangChain
|
||||
|
||||
⚡ Building applications with LLMs through composability ⚡
|
||||
|
||||
[](https://github.com/langchain-ai/langchain/releases)
|
||||
[](https://github.com/langchain-ai/langchain/actions/workflows/lint.yml)
|
||||
[](https://github.com/langchain-ai/langchain/actions/workflows/test.yml)
|
||||
[](https://pepy.tech/project/langchain)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://twitter.com/langchainai)
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain)
|
||||
[](https://codespaces.new/langchain-ai/langchain)
|
||||
[](https://star-history.com/#langchain-ai/langchain)
|
||||
[](https://libraries.io/github/langchain-ai/langchain)
|
||||
[](https://github.com/langchain-ai/langchain/issues)
|
||||
|
||||
|
||||
Looking for the JS/TS version? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
||||
|
||||
To help you ship LangChain apps to production faster, check out [LangSmith](https://smith.langchain.com).
|
||||
[LangSmith](https://smith.langchain.com) is a unified developer platform for building, testing, and monitoring LLM applications.
|
||||
Fill out [this form](https://www.langchain.com/contact-sales) to speak with our sales team.
|
||||
|
||||
## Quick Install
|
||||
|
||||
`pip install langchain`
|
||||
or
|
||||
`pip install langsmith && conda install langchain -c conda-forge`
|
||||
|
||||
## 🤔 What is this?
|
||||
|
||||
Large language models (LLMs) are emerging as a transformative technology, enabling developers to build applications that they previously could not. However, using these LLMs in isolation is often insufficient for creating a truly powerful app - the real power comes when you can combine them with other sources of computation or knowledge.
|
||||
|
||||
This library aims to assist in the development of those types of applications. Common examples of these applications include:
|
||||
|
||||
**❓ Question answering with RAG**
|
||||
|
||||
- [Documentation](https://python.langchain.com/docs/use_cases/question_answering/)
|
||||
- End-to-end Example: [Chat LangChain](https://chat.langchain.com) and [repo](https://github.com/langchain-ai/chat-langchain)
|
||||
|
||||
**🧱 Extracting structured output**
|
||||
|
||||
- [Documentation](https://python.langchain.com/docs/use_cases/extraction/)
|
||||
- End-to-end Example: [SQL Llama2 Template](https://github.com/langchain-ai/langchain-extract/)
|
||||
|
||||
**🤖 Chatbots**
|
||||
|
||||
- [Documentation](https://python.langchain.com/docs/use_cases/chatbots)
|
||||
- End-to-end Example: [Web LangChain (web researcher chatbot)](https://weblangchain.vercel.app) and [repo](https://github.com/langchain-ai/weblangchain)
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
Please see [here](https://python.langchain.com) for full documentation on:
|
||||
|
||||
- Getting started (installation, setting up the environment, simple examples)
|
||||
- How-To examples (demos, integrations, helper functions)
|
||||
- Reference (full API docs)
|
||||
- Resources (high-level explanation of core concepts)
|
||||
|
||||
## 🚀 What can this help with?
|
||||
|
||||
There are five main areas that LangChain is designed to help with.
|
||||
These are, in increasing order of complexity:
|
||||
|
||||
**📃 Models and Prompts:**
|
||||
|
||||
This includes prompt management, prompt optimization, a generic interface for all LLMs, and common utilities for working with chat models and LLMs.
|
||||
|
||||
**🔗 Chains:**
|
||||
|
||||
Chains go beyond a single LLM call and involve sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.
|
||||
|
||||
**📚 Retrieval Augmented Generation:**
|
||||
|
||||
Retrieval Augmented Generation involves specific types of chains that first interact with an external data source to fetch data for use in the generation step. Examples include summarization of long pieces of text and question/answering over specific data sources.
|
||||
|
||||
**🤖 Agents:**
|
||||
|
||||
Agents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end-to-end agents.
|
||||
|
||||
**🧐 Evaluation:**
|
||||
|
||||
[BETA] Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.
|
||||
|
||||
For more information on these concepts, please see our [full documentation](https://python.langchain.com).
|
||||
|
||||
## 💁 Contributing
|
||||
|
||||
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infrastructure, or better documentation.
|
||||
|
||||
For detailed information on how to contribute, see the [Contributing Guide](https://python.langchain.com/docs/contributing/).
|
||||
@@ -1,5 +0,0 @@
|
||||
-e ../partners/openai
|
||||
-e ../partners/anthropic
|
||||
-e ../partners/fireworks
|
||||
-e ../partners/mistralai
|
||||
-e ../partners/groq
|
||||
@@ -1,29 +0,0 @@
|
||||
"""Main entrypoint into package."""
|
||||
|
||||
from importlib import metadata
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
__version__ = metadata.version(__package__)
|
||||
except metadata.PackageNotFoundError:
|
||||
# Case where package metadata is not available.
|
||||
__version__ = ""
|
||||
del metadata # optional, avoids polluting the results of dir(__package__)
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Any: # noqa: ANN401
|
||||
"""Get an attribute from the package."""
|
||||
if name == "verbose":
|
||||
from langchain.globals import _verbose
|
||||
|
||||
return _verbose
|
||||
if name == "debug":
|
||||
from langchain.globals import _debug
|
||||
|
||||
return _debug
|
||||
if name == "llm_cache":
|
||||
from langchain.globals import _llm_cache
|
||||
|
||||
return _llm_cache
|
||||
msg = f"Could not find: {name}"
|
||||
raise AttributeError(msg)
|
||||
@@ -1,24 +0,0 @@
|
||||
"""**Chat Models** are a variation on language models.
|
||||
|
||||
While Chat Models use language models under the hood, the interface they expose
|
||||
is a bit different. Rather than expose a "text in, text out" API, they expose
|
||||
an interface where "chat messages" are the inputs and outputs.
|
||||
|
||||
**Class hierarchy:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
BaseLanguageModel --> BaseChatModel --> <name> # Examples: ChatOpenAI, ChatGooglePalm
|
||||
|
||||
**Main helpers:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
AIMessage, BaseMessage, HumanMessage
|
||||
""" # noqa: E501
|
||||
|
||||
from langchain.chat_models.base import init_chat_model
|
||||
|
||||
__all__ = [
|
||||
"init_chat_model",
|
||||
]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user