Merge branch 'master' into sqldocstore_postgres_compat

This commit is contained in:
Alex Lee 2025-03-17 17:09:05 -07:00 committed by GitHub
commit 3daa625420
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
40 changed files with 687 additions and 201 deletions

View File

@ -6,6 +6,7 @@ on:
working-directory: working-directory:
required: true required: true
type: string type: string
description: "From which folder this pipeline executes"
python-version: python-version:
required: true required: true
type: string type: string

View File

@ -12,6 +12,7 @@ on:
working-directory: working-directory:
required: true required: true
type: string type: string
description: "From which folder this pipeline executes"
default: 'libs/langchain' default: 'libs/langchain'
dangerous-nonmaster-release: dangerous-nonmaster-release:
required: false required: false

View File

@ -1,4 +1,3 @@
---
name: CI name: CI
on: on:

View File

@ -1,4 +1,3 @@
---
name: Integration docs lint name: Integration docs lint
on: on:

View File

@ -1,4 +1,3 @@
---
name: CI / cd . / make spell_check name: CI / cd . / make spell_check
on: on:

View File

@ -16,7 +16,7 @@ This need motivates the concept of structured output, where models can be instru
## Recommended usage ## Recommended usage
This pseudo-code illustrates the recommended workflow when using structured output. This pseudocode illustrates the recommended workflow when using structured output.
LangChain provides a method, [`with_structured_output()`](/docs/how_to/structured_output/#the-with_structured_output-method), that automates the process of binding the schema to the [model](/docs/concepts/chat_models/) and parsing the output. LangChain provides a method, [`with_structured_output()`](/docs/how_to/structured_output/#the-with_structured_output-method), that automates the process of binding the schema to the [model](/docs/concepts/chat_models/) and parsing the output.
This helper function is available for all model providers that support structured output. This helper function is available for all model providers that support structured output.

View File

@ -30,7 +30,7 @@ You will sometimes hear the term `function calling`. We use this term interchang
## Recommended usage ## Recommended usage
This pseudo-code illustrates the recommended workflow for using tool calling. This pseudocode illustrates the recommended workflow for using tool calling.
Created tools are passed to `.bind_tools()` method as a list. Created tools are passed to `.bind_tools()` method as a list.
This model can be called, as usual. If a tool call is made, model's response will contain the tool call arguments. This model can be called, as usual. If a tool call is made, model's response will contain the tool call arguments.
The tool call arguments can be passed directly to the tool. The tool call arguments can be passed directly to the tool.

View File

@ -11,7 +11,7 @@
"\n", "\n",
"This covers how to load `HTML` documents into a LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n", "This covers how to load `HTML` documents into a LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
"\n", "\n",
"Parsing HTML files often requires specialized tools. Here we demonstrate parsing via [Unstructured](https://unstructured-io.github.io/unstructured/) and [BeautifulSoup4](https://beautiful-soup-4.readthedocs.io/en/latest/), which can be installed via pip. Head over to the integrations page to find integrations with additional services, such as [Azure AI Document Intelligence](/docs/integrations/document_loaders/azure_document_intelligence) or [FireCrawl](/docs/integrations/document_loaders/firecrawl).\n", "Parsing HTML files often requires specialized tools. Here we demonstrate parsing via [Unstructured](https://docs.unstructured.io) and [BeautifulSoup4](https://beautiful-soup-4.readthedocs.io/en/latest/), which can be installed via pip. Head over to the integrations page to find integrations with additional services, such as [Azure AI Document Intelligence](/docs/integrations/document_loaders/azure_document_intelligence) or [FireCrawl](/docs/integrations/document_loaders/firecrawl).\n",
"\n", "\n",
"## Loading HTML with Unstructured" "## Loading HTML with Unstructured"
] ]

View File

@ -81,8 +81,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
] ]
}, },
{ {

View File

@ -82,8 +82,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
] ]
}, },
{ {

View File

@ -404,7 +404,7 @@
"source": [ "source": [
"## Responses API\n", "## Responses API\n",
"\n", "\n",
":::info Requires ``langchain-openai>=0.3.9-rc.1``\n", ":::info Requires ``langchain-openai>=0.3.9``\n",
"\n", "\n",
":::\n", ":::\n",
"\n", "\n",

View File

@ -69,8 +69,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
] ]
}, },
{ {

View File

@ -81,8 +81,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
] ]
}, },
{ {

View File

@ -84,8 +84,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
] ]
}, },
{ {

View File

@ -65,8 +65,8 @@
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de", "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
], ],
"outputs": [], "outputs": [],
"execution_count": null "execution_count": null

View File

@ -521,9 +521,9 @@
"source": [ "source": [
"## API reference\n", "## API reference\n",
"\n", "\n",
"- [LangChain Docling integration GitHub](https://github.com/DS4SD/docling-langchain)\n", "- [LangChain Docling integration GitHub](https://github.com/docling-project/docling-langchain)\n",
"- [Docling GitHub](https://github.com/DS4SD/docling)\n", "- [Docling GitHub](https://github.com/docling-project/docling)\n",
"- [Docling docs](https://ds4sd.github.io/docling/)" "- [Docling docs](https://docling-project.github.io/docling//)"
] ]
}, },
{ {

View File

@ -68,7 +68,7 @@
"id": "92a22c77f03d43dc", "id": "92a22c77f03d43dc",
"metadata": {}, "metadata": {},
"source": [ "source": [
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability. If you wish to do so, you can set the `LANGCHAIN_TRACING_V2` and `LANGCHAIN_API_KEY` environment variables:" "It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability. If you wish to do so, you can set the `LANGSMITH_TRACING` and `LANGSMITH_API_KEY` environment variables:"
] ]
}, },
{ {
@ -76,8 +76,8 @@
"id": "98d8422ecee77403", "id": "98d8422ecee77403",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
], ],
"outputs": [], "outputs": [],
"execution_count": null "execution_count": null

View File

@ -64,8 +64,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
] ]
}, },
{ {

View File

@ -6,15 +6,15 @@ keywords: [openai]
All functionality related to OpenAI All functionality related to OpenAI
>[OpenAI](https://en.wikipedia.org/wiki/OpenAI) is American artificial intelligence (AI) research laboratory > [OpenAI](https://en.wikipedia.org/wiki/OpenAI) is American artificial intelligence (AI) research laboratory
> consisting of the non-profit `OpenAI Incorporated` > consisting of the non-profit **OpenAI Incorporated**
> and its for-profit subsidiary corporation `OpenAI Limited Partnership`. > and its for-profit subsidiary corporation **OpenAI Limited Partnership**.
> `OpenAI` conducts AI research with the declared intention of promoting and developing a friendly AI. > **OpenAI** conducts AI research with the declared intention of promoting and developing a friendly AI.
> `OpenAI` systems run on an `Azure`-based supercomputing platform from `Microsoft`. > **OpenAI** systems run on an **Azure**-based supercomputing platform from **Microsoft**.
>
>The [OpenAI API](https://platform.openai.com/docs/models) is powered by a diverse set of models with different capabilities and price points. > The [OpenAI API](https://platform.openai.com/docs/models) is powered by a diverse set of models with different capabilities and price points.
> >
>[ChatGPT](https://chat.openai.com) is the Artificial Intelligence (AI) chatbot developed by `OpenAI`. > [ChatGPT](https://chat.openai.com) is the Artificial Intelligence (AI) chatbot developed by `OpenAI`.
## Installation and Setup ## Installation and Setup

View File

@ -99,7 +99,7 @@
"data": { "data": {
"text/plain": [ "text/plain": [
"[Document(page_content='This walkthrough demonstrates how to use an agent optimized for conversation. Other agents are often optimized for using tools to figure out the best response, which is not ideal in a conversational setting where you may want the agent to be able to chat with the user as well.\\n\\nIf we compare it to the standard ReAct agent, the main difference is the prompt. We want it to be much more conversational.\\n\\nfrom langchain.agents import AgentType, Tool, initialize_agent\\n\\nfrom langchain_openai import OpenAI\\n\\nfrom langchain.memory import ConversationBufferMemory\\n\\nfrom langchain_community.utilities import SerpAPIWrapper\\n\\nsearch = SerpAPIWrapper() tools = \\\\[ Tool( name=\"Current Search\", func=search.run, description=\"useful for when you need to answer questions about current events or the current state of the world\", ), \\\\]\\n\\n\\\\\\nllm = OpenAI(temperature=0)\\n\\nUsing LCEL\\n\\nWe will first show how to create this agent using LCEL\\n\\nfrom langchain import hub\\n\\nfrom langchain.agents.format_scratchpad import format_log_to_str\\n\\nfrom langchain.agents.output_parsers import ReActSingleInputOutputParser\\n\\nfrom langchain.tools.render import render_text_description\\n\\nprompt = hub.pull(\"hwchase17/react-chat\")\\n\\nprompt = prompt.partial( tools=render_text_description(tools), tool_names=\", \".join(\\\\[[t.name](http://t.name) for t in tools\\\\]), )\\n\\nllm_with_stop = llm.bind(stop=\\\\[\"\\\\nObservation\"\\\\])\\n\\nagent = ( { \"input\": lambda x: x\\\\[\"input\"\\\\], \"agent_scratchpad\": lambda x: format_log_to_str(x\\\\[\"intermediate_steps\"\\\\]), \"chat_history\": lambda x: x\\\\[\"chat_history\"\\\\], } | prompt | llm_with_stop | ReActSingleInputOutputParser() )\\n\\nfrom langchain.agents import AgentExecutor\\n\\nmemory = ConversationBufferMemory(memory_key=\"chat_history\") agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=memory)\\n\\nagent_executor.invoke({\"input\": \"hi, i am bob\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? No\\nFinal Answer: Hi Bob, nice to meet you! How can I help you today?\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'Hi Bob, nice to meet you! How can I help you today?\\'\\n\\nagent_executor.invoke({\"input\": \"whats my name?\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? No\\nFinal Answer: Your name is Bob.\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'Your name is Bob.\\'\\n\\nagent_executor.invoke({\"input\": \"what are some movies showing 9/21/2023?\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? Yes\\nAction: Current Search\\nAction Input: Movies showing 9/21/2023[\\'September 2023 Movies: The Creator • Dumb Money • Expend4bles • The Kill Room • The Inventor • The Equalizer 3 • PAW Patrol: The Mighty Movie, ...\\'] Do I need to use a tool? No\\nFinal Answer: According to current search, some movies showing on 9/21/2023 are The Creator, Dumb Money, Expend4bles, The Kill Room, The Inventor, The Equalizer 3, and PAW Patrol: The Mighty Movie.\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'According to current search, some movies showing on 9/21/2023 are The Creator, Dumb Money, Expend4bles, The Kill Room, The Inventor, The Equalizer 3, and PAW Patrol: The Mighty Movie.\\'\\n\\n\\\\\\nUse the off-the-shelf agent\\n\\nWe can also create this agent using the off-the-shelf agent class\\n\\nagent_executor = initialize_agent( tools, llm, agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory, )\\n\\nUse a chat model\\n\\nWe can also use a chat model here. The main difference here is in the prompts used.\\n\\nfrom langchain import hub\\n\\nfrom langchain_openai import ChatOpenAI\\n\\nprompt = hub.pull(\"hwchase17/react-chat-json\") chat_model = ChatOpenAI(temperature=0, model=\"gpt-4\")\\n\\nprompt = prompt.partial( tools=render_text_description(tools), tool_names=\", \".join(\\\\[[t.name](http://t.name) for t in tools\\\\]), )\\n\\nchat_model_with_stop = chat_model.bind(stop=\\\\[\"\\\\nObservation\"\\\\])\\n\\nfrom langchain.agents.format_scratchpad import format_log_to_messages\\n\\nfrom langchain.agents.output_parsers import JSONAgentOutputParser\\n\\n# We need some extra steering, or the c', metadata={'title': 'Conversational', 'source': 'https://d01.getoutline.com/doc/conversational-B5dBkUgQ4b'}),\n", "[Document(page_content='This walkthrough demonstrates how to use an agent optimized for conversation. Other agents are often optimized for using tools to figure out the best response, which is not ideal in a conversational setting where you may want the agent to be able to chat with the user as well.\\n\\nIf we compare it to the standard ReAct agent, the main difference is the prompt. We want it to be much more conversational.\\n\\nfrom langchain.agents import AgentType, Tool, initialize_agent\\n\\nfrom langchain_openai import OpenAI\\n\\nfrom langchain.memory import ConversationBufferMemory\\n\\nfrom langchain_community.utilities import SerpAPIWrapper\\n\\nsearch = SerpAPIWrapper() tools = \\\\[ Tool( name=\"Current Search\", func=search.run, description=\"useful for when you need to answer questions about current events or the current state of the world\", ), \\\\]\\n\\n\\\\\\nllm = OpenAI(temperature=0)\\n\\nUsing LCEL\\n\\nWe will first show how to create this agent using LCEL\\n\\nfrom langchain import hub\\n\\nfrom langchain.agents.format_scratchpad import format_log_to_str\\n\\nfrom langchain.agents.output_parsers import ReActSingleInputOutputParser\\n\\nfrom langchain.tools.render import render_text_description\\n\\nprompt = hub.pull(\"hwchase17/react-chat\")\\n\\nprompt = prompt.partial( tools=render_text_description(tools), tool_names=\", \".join(\\\\[[t.name](http://t.name) for t in tools\\\\]), )\\n\\nllm_with_stop = llm.bind(stop=\\\\[\"\\\\nObservation\"\\\\])\\n\\nagent = ( { \"input\": lambda x: x\\\\[\"input\"\\\\], \"agent_scratchpad\": lambda x: format_log_to_str(x\\\\[\"intermediate_steps\"\\\\]), \"chat_history\": lambda x: x\\\\[\"chat_history\"\\\\], } | prompt | llm_with_stop | ReActSingleInputOutputParser() )\\n\\nfrom langchain.agents import AgentExecutor\\n\\nmemory = ConversationBufferMemory(memory_key=\"chat_history\") agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=memory)\\n\\nagent_executor.invoke({\"input\": \"hi, i am bob\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? No\\nFinal Answer: Hi Bob, nice to meet you! How can I help you today?\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'Hi Bob, nice to meet you! How can I help you today?\\'\\n\\nagent_executor.invoke({\"input\": \"whats my name?\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? No\\nFinal Answer: Your name is Bob.\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'Your name is Bob.\\'\\n\\nagent_executor.invoke({\"input\": \"what are some movies showing 9/21/2023?\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? Yes\\nAction: Current Search\\nAction Input: Movies showing 9/21/2023[\\'September 2023 Movies: The Creator • Dumb Money • Expend4bles • The Kill Room • The Inventor • The Equalizer 3 • PAW Patrol: The Mighty Movie, ...\\'] Do I need to use a tool? No\\nFinal Answer: According to current search, some movies showing on 9/21/2023 are The Creator, Dumb Money, Expend4bles, The Kill Room, The Inventor, The Equalizer 3, and PAW Patrol: The Mighty Movie.\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'According to current search, some movies showing on 9/21/2023 are The Creator, Dumb Money, Expend4bles, The Kill Room, The Inventor, The Equalizer 3, and PAW Patrol: The Mighty Movie.\\'\\n\\n\\\\\\nUse the off-the-shelf agent\\n\\nWe can also create this agent using the off-the-shelf agent class\\n\\nagent_executor = initialize_agent( tools, llm, agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory, )\\n\\nUse a chat model\\n\\nWe can also use a chat model here. The main difference here is in the prompts used.\\n\\nfrom langchain import hub\\n\\nfrom langchain_openai import ChatOpenAI\\n\\nprompt = hub.pull(\"hwchase17/react-chat-json\") chat_model = ChatOpenAI(temperature=0, model=\"gpt-4\")\\n\\nprompt = prompt.partial( tools=render_text_description(tools), tool_names=\", \".join(\\\\[[t.name](http://t.name) for t in tools\\\\]), )\\n\\nchat_model_with_stop = chat_model.bind(stop=\\\\[\"\\\\nObservation\"\\\\])\\n\\nfrom langchain.agents.format_scratchpad import format_log_to_messages\\n\\nfrom langchain.agents.output_parsers import JSONAgentOutputParser\\n\\n# We need some extra steering, or the c', metadata={'title': 'Conversational', 'source': 'https://d01.getoutline.com/doc/conversational-B5dBkUgQ4b'}),\n",
" Document(page_content='Quickstart\\n\\nIn this quickstart we\\'ll show you how to:\\n\\nGet setup with LangChain, LangSmith and LangServe\\n\\nUse the most basic and common components of LangChain: prompt templates, models, and output parsers\\n\\nUse LangChain Expression Language, the protocol that LangChain is built on and which facilitates component chaining\\n\\nBuild a simple application with LangChain\\n\\nTrace your application with LangSmith\\n\\nServe your application with LangServe\\n\\nThat\\'s a fair amount to cover! Let\\'s dive in.\\n\\nSetup\\n\\nInstallation\\n\\nTo install LangChain run:\\n\\nPip\\n\\nConda\\n\\npip install langchain\\n\\nFor more details, see our Installation guide.\\n\\nEnvironment\\n\\nUsing LangChain will usually require integrations with one or more model providers, data stores, APIs, etc. For this example, we\\'ll use OpenAI\\'s model APIs.\\n\\nFirst we\\'ll need to install their Python package:\\n\\npip install openai\\n\\nAccessing the API requires an API key, which you can get by creating an account and heading here. Once we have a key we\\'ll want to set it as an environment variable by running:\\n\\nexport OPENAI_API_KEY=\"...\"\\n\\nIf you\\'d prefer not to set an environment variable you can pass the key in directly via the openai_api_key named parameter when initiating the OpenAI LLM class:\\n\\nfrom langchain_openai import ChatOpenAI\\n\\nllm = ChatOpenAI(openai_api_key=\"...\")\\n\\nLangSmith\\n\\nMany of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with LangSmith.\\n\\nNote that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\\n\\nexport LANGCHAIN_TRACING_V2=\"true\" export LANGCHAIN_API_KEY=...\\n\\nLangServe\\n\\nLangServe helps developers deploy LangChain chains as a REST API. You do not need to use LangServe to use LangChain, but in this guide we\\'ll show how you can deploy your app with LangServe.\\n\\nInstall with:\\n\\npip install \"langserve\\\\[all\\\\]\"\\n\\nBuilding with LangChain\\n\\nLangChain provides many modules that can be used to build language model applications. Modules can be used as standalones in simple applications and they can be composed for more complex use cases. Composition is powered by LangChain Expression Language (LCEL), which defines a unified Runnable interface that many modules implement, making it possible to seamlessly chain components.\\n\\nThe simplest and most common chain contains three things:\\n\\nLLM/Chat Model: The language model is the core reasoning engine here. In order to work with LangChain, you need to understand the different types of language models and how to work with them. Prompt Template: This provides instructions to the language model. This controls what the language model outputs, so understanding how to construct prompts and different prompting strategies is crucial. Output Parser: These translate the raw response from the language model to a more workable format, making it easy to use the output downstream. In this guide we\\'ll cover those three components individually, and then go over how to combine them. Understanding these concepts will set you up well for being able to use and customize LangChain applications. Most LangChain applications allow you to configure the model and/or the prompt, so knowing how to take advantage of this will be a big enabler.\\n\\nLLM / Chat Model\\n\\nThere are two types of language models:\\n\\nLLM: underlying model takes a string as input and returns a string\\n\\nChatModel: underlying model takes a list of messages as input and returns a message\\n\\nStrings are simple, but what exactly are messages? The base message interface is defined by BaseMessage, which has two required attributes:\\n\\ncontent: The content of the message. Usually a string. role: The entity from which the BaseMessage is coming. LangChain provides several ob', metadata={'title': 'Quick Start', 'source': 'https://d01.getoutline.com/doc/quick-start-jGuGGGOTuL'}),\n", " Document(page_content='Quickstart\\n\\nIn this quickstart we\\'ll show you how to:\\n\\nGet setup with LangChain, LangSmith and LangServe\\n\\nUse the most basic and common components of LangChain: prompt templates, models, and output parsers\\n\\nUse LangChain Expression Language, the protocol that LangChain is built on and which facilitates component chaining\\n\\nBuild a simple application with LangChain\\n\\nTrace your application with LangSmith\\n\\nServe your application with LangServe\\n\\nThat\\'s a fair amount to cover! Let\\'s dive in.\\n\\nSetup\\n\\nInstallation\\n\\nTo install LangChain run:\\n\\nPip\\n\\nConda\\n\\npip install langchain\\n\\nFor more details, see our Installation guide.\\n\\nEnvironment\\n\\nUsing LangChain will usually require integrations with one or more model providers, data stores, APIs, etc. For this example, we\\'ll use OpenAI\\'s model APIs.\\n\\nFirst we\\'ll need to install their Python package:\\n\\npip install openai\\n\\nAccessing the API requires an API key, which you can get by creating an account and heading here. Once we have a key we\\'ll want to set it as an environment variable by running:\\n\\nexport OPENAI_API_KEY=\"...\"\\n\\nIf you\\'d prefer not to set an environment variable you can pass the key in directly via the openai_api_key named parameter when initiating the OpenAI LLM class:\\n\\nfrom langchain_openai import ChatOpenAI\\n\\nllm = ChatOpenAI(openai_api_key=\"...\")\\n\\nLangSmith\\n\\nMany of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with LangSmith.\\n\\nNote that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\\n\\nexport LANGSMITH_TRACING=\"true\" export LANGSMITH_API_KEY=...\\n\\nLangServe\\n\\nLangServe helps developers deploy LangChain chains as a REST API. You do not need to use LangServe to use LangChain, but in this guide we\\'ll show how you can deploy your app with LangServe.\\n\\nInstall with:\\n\\npip install \"langserve\\\\[all\\\\]\"\\n\\nBuilding with LangChain\\n\\nLangChain provides many modules that can be used to build language model applications. Modules can be used as standalones in simple applications and they can be composed for more complex use cases. Composition is powered by LangChain Expression Language (LCEL), which defines a unified Runnable interface that many modules implement, making it possible to seamlessly chain components.\\n\\nThe simplest and most common chain contains three things:\\n\\nLLM/Chat Model: The language model is the core reasoning engine here. In order to work with LangChain, you need to understand the different types of language models and how to work with them. Prompt Template: This provides instructions to the language model. This controls what the language model outputs, so understanding how to construct prompts and different prompting strategies is crucial. Output Parser: These translate the raw response from the language model to a more workable format, making it easy to use the output downstream. In this guide we\\'ll cover those three components individually, and then go over how to combine them. Understanding these concepts will set you up well for being able to use and customize LangChain applications. Most LangChain applications allow you to configure the model and/or the prompt, so knowing how to take advantage of this will be a big enabler.\\n\\nLLM / Chat Model\\n\\nThere are two types of language models:\\n\\nLLM: underlying model takes a string as input and returns a string\\n\\nChatModel: underlying model takes a list of messages as input and returns a message\\n\\nStrings are simple, but what exactly are messages? The base message interface is defined by BaseMessage, which has two required attributes:\\n\\ncontent: The content of the message. Usually a string. role: The entity from which the BaseMessage is coming. LangChain provides several ob', metadata={'title': 'Quick Start', 'source': 'https://d01.getoutline.com/doc/quick-start-jGuGGGOTuL'}),\n",
" Document(page_content='This walkthrough showcases using an agent to implement the [ReAct](https://react-lm.github.io/) logic.\\n\\n```javascript\\nfrom langchain.agents import AgentType, initialize_agent, load_tools\\nfrom langchain_openai import OpenAI\\n```\\n\\nFirst, let\\'s load the language model we\\'re going to use to control the agent.\\n\\n```javascript\\nllm = OpenAI(temperature=0)\\n```\\n\\nNext, let\\'s load some tools to use. Note that the llm-math tool uses an LLM, so we need to pass that in.\\n\\n```javascript\\ntools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\\n```\\n\\n## Using LCEL[\\u200b](/docs/modules/agents/agent_types/react#using-lcel \"Direct link to Using LCEL\")\\n\\nWe will first show how to create the agent using LCEL\\n\\n```javascript\\nfrom langchain import hub\\nfrom langchain.agents.format_scratchpad import format_log_to_str\\nfrom langchain.agents.output_parsers import ReActSingleInputOutputParser\\nfrom langchain.tools.render import render_text_description\\n```\\n\\n```javascript\\nprompt = hub.pull(\"hwchase17/react\")\\nprompt = prompt.partial(\\n tools=render_text_description(tools),\\n tool_names=\", \".join([t.name for t in tools]),\\n)\\n```\\n\\n```javascript\\nllm_with_stop = llm.bind(stop=[\"\\\\nObservation\"])\\n```\\n\\n```javascript\\nagent = (\\n {\\n \"input\": lambda x: x[\"input\"],\\n \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\\n }\\n | prompt\\n | llm_with_stop\\n | ReActSingleInputOutputParser()\\n)\\n```\\n\\n```javascript\\nfrom langchain.agents import AgentExecutor\\n```\\n\\n```javascript\\nagent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\\n```\\n\\n```javascript\\nagent_executor.invoke(\\n {\\n \"input\": \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\"\\n }\\n)\\n```\\n\\n```javascript\\n \\n \\n > Entering new AgentExecutor chain...\\n I need to find out who Leo DiCaprio\\'s girlfriend is and then calculate her age raised to the 0.43 power.\\n Action: Search\\n Action Input: \"Leo DiCaprio girlfriend\"model Vittoria Ceretti I need to find out Vittoria Ceretti\\'s age\\n Action: Search\\n Action Input: \"Vittoria Ceretti age\"25 years I need to calculate 25 raised to the 0.43 power\\n Action: Calculator\\n Action Input: 25^0.43Answer: 3.991298452658078 I now know the final answer\\n Final Answer: Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\\n \\n > Finished chain.\\n\\n\\n\\n\\n\\n {\\'input\\': \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\",\\n \\'output\\': \"Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\"}\\n```\\n\\n## Using ZeroShotReactAgent[\\u200b](/docs/modules/agents/agent_types/react#using-zeroshotreactagent \"Direct link to Using ZeroShotReactAgent\")\\n\\nWe will now show how to use the agent with an off-the-shelf agent implementation\\n\\n```javascript\\nagent_executor = initialize_agent(\\n tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\\n)\\n```\\n\\n```javascript\\nagent_executor.invoke(\\n {\\n \"input\": \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\"\\n }\\n)\\n```\\n\\n```javascript\\n \\n \\n > Entering new AgentExecutor chain...\\n I need to find out who Leo DiCaprio\\'s girlfriend is and then calculate her age raised to the 0.43 power.\\n Action: Search\\n Action Input: \"Leo DiCaprio girlfriend\"\\n Observation: model Vittoria Ceretti\\n Thought: I need to find out Vittoria Ceretti\\'s age\\n Action: Search\\n Action Input: \"Vittoria Ceretti age\"\\n Observation: 25 years\\n Thought: I need to calculate 25 raised to the 0.43 power\\n Action: Calculator\\n Action Input: 25^0.43\\n Observation: Answer: 3.991298452658078\\n Thought: I now know the final answer\\n Final Answer: Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\\n \\n > Finished chain.\\n\\n\\n\\n\\n\\n {\\'input\\': \"Who is L', metadata={'title': 'ReAct', 'source': 'https://d01.getoutline.com/doc/react-d6rxRS1MHk'})]" " Document(page_content='This walkthrough showcases using an agent to implement the [ReAct](https://react-lm.github.io/) logic.\\n\\n```javascript\\nfrom langchain.agents import AgentType, initialize_agent, load_tools\\nfrom langchain_openai import OpenAI\\n```\\n\\nFirst, let\\'s load the language model we\\'re going to use to control the agent.\\n\\n```javascript\\nllm = OpenAI(temperature=0)\\n```\\n\\nNext, let\\'s load some tools to use. Note that the llm-math tool uses an LLM, so we need to pass that in.\\n\\n```javascript\\ntools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\\n```\\n\\n## Using LCEL[\\u200b](/docs/modules/agents/agent_types/react#using-lcel \"Direct link to Using LCEL\")\\n\\nWe will first show how to create the agent using LCEL\\n\\n```javascript\\nfrom langchain import hub\\nfrom langchain.agents.format_scratchpad import format_log_to_str\\nfrom langchain.agents.output_parsers import ReActSingleInputOutputParser\\nfrom langchain.tools.render import render_text_description\\n```\\n\\n```javascript\\nprompt = hub.pull(\"hwchase17/react\")\\nprompt = prompt.partial(\\n tools=render_text_description(tools),\\n tool_names=\", \".join([t.name for t in tools]),\\n)\\n```\\n\\n```javascript\\nllm_with_stop = llm.bind(stop=[\"\\\\nObservation\"])\\n```\\n\\n```javascript\\nagent = (\\n {\\n \"input\": lambda x: x[\"input\"],\\n \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\\n }\\n | prompt\\n | llm_with_stop\\n | ReActSingleInputOutputParser()\\n)\\n```\\n\\n```javascript\\nfrom langchain.agents import AgentExecutor\\n```\\n\\n```javascript\\nagent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\\n```\\n\\n```javascript\\nagent_executor.invoke(\\n {\\n \"input\": \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\"\\n }\\n)\\n```\\n\\n```javascript\\n \\n \\n > Entering new AgentExecutor chain...\\n I need to find out who Leo DiCaprio\\'s girlfriend is and then calculate her age raised to the 0.43 power.\\n Action: Search\\n Action Input: \"Leo DiCaprio girlfriend\"model Vittoria Ceretti I need to find out Vittoria Ceretti\\'s age\\n Action: Search\\n Action Input: \"Vittoria Ceretti age\"25 years I need to calculate 25 raised to the 0.43 power\\n Action: Calculator\\n Action Input: 25^0.43Answer: 3.991298452658078 I now know the final answer\\n Final Answer: Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\\n \\n > Finished chain.\\n\\n\\n\\n\\n\\n {\\'input\\': \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\",\\n \\'output\\': \"Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\"}\\n```\\n\\n## Using ZeroShotReactAgent[\\u200b](/docs/modules/agents/agent_types/react#using-zeroshotreactagent \"Direct link to Using ZeroShotReactAgent\")\\n\\nWe will now show how to use the agent with an off-the-shelf agent implementation\\n\\n```javascript\\nagent_executor = initialize_agent(\\n tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\\n)\\n```\\n\\n```javascript\\nagent_executor.invoke(\\n {\\n \"input\": \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\"\\n }\\n)\\n```\\n\\n```javascript\\n \\n \\n > Entering new AgentExecutor chain...\\n I need to find out who Leo DiCaprio\\'s girlfriend is and then calculate her age raised to the 0.43 power.\\n Action: Search\\n Action Input: \"Leo DiCaprio girlfriend\"\\n Observation: model Vittoria Ceretti\\n Thought: I need to find out Vittoria Ceretti\\'s age\\n Action: Search\\n Action Input: \"Vittoria Ceretti age\"\\n Observation: 25 years\\n Thought: I need to calculate 25 raised to the 0.43 power\\n Action: Calculator\\n Action Input: 25^0.43\\n Observation: Answer: 3.991298452658078\\n Thought: I now know the final answer\\n Final Answer: Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\\n \\n > Finished chain.\\n\\n\\n\\n\\n\\n {\\'input\\': \"Who is L', metadata={'title': 'ReAct', 'source': 'https://d01.getoutline.com/doc/react-d6rxRS1MHk'})]"
] ]
}, },

View File

@ -66,7 +66,7 @@
"id": "92a22c77f03d43dc", "id": "92a22c77f03d43dc",
"metadata": {}, "metadata": {},
"source": [ "source": [
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability. If you wish to do so, you can set the `LANGCHAIN_TRACING_V2` and `LANGCHAIN_API_KEY` environment variables:" "It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability. If you wish to do so, you can set the `LANGSMITH_TRACING` and `LANGSMITH_API_KEY` environment variables:"
] ]
}, },
{ {
@ -74,8 +74,8 @@
"id": "98d8422ecee77403", "id": "98d8422ecee77403",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
], ],
"outputs": [], "outputs": [],
"execution_count": null "execution_count": null

View File

@ -81,8 +81,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
] ]
}, },
{ {

View File

@ -89,8 +89,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
] ]
}, },
{ {

View File

@ -59,8 +59,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
] ]
}, },
{ {

View File

@ -101,8 +101,8 @@
} }
}, },
"source": [ "source": [
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()"
], ],
"outputs": [], "outputs": [],
"execution_count": 4 "execution_count": 4

View File

@ -59,7 +59,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's load the OpenAI key to the environemnt. If you don't have one you can create an OpenAI account and create a key on this [page](https://platform.openai.com/account/api-keys)." "Let's load the OpenAI key to the environment. If you don't have one you can create an OpenAI account and create a key on this [page](https://platform.openai.com/account/api-keys)."
] ]
}, },
{ {

View File

@ -99,7 +99,7 @@ export const CustomDropdown = ({ selectedOption, options, onSelect, modelType })
* @param {ChatModelTabsProps} props - Component props. * @param {ChatModelTabsProps} props - Component props.
*/ */
export default function ChatModelTabs(props) { export default function ChatModelTabs(props) {
const [selectedModel, setSelectedModel] = useState("groq"); const [selectedModel, setSelectedModel] = useState("openai");
const { const {
overrideParams, overrideParams,
customVarName, customVarName,
@ -108,13 +108,6 @@ export default function ChatModelTabs(props) {
const llmVarName = customVarName ?? "model"; const llmVarName = customVarName ?? "model";
const tabItems = [ const tabItems = [
{
value: "groq",
label: "Groq",
model: "llama3-8b-8192",
apiKeyName: "GROQ_API_KEY",
packageName: "langchain[groq]",
},
{ {
value: "openai", value: "openai",
label: "OpenAI", label: "OpenAI",
@ -156,6 +149,13 @@ ${llmVarName} = AzureChatOpenAI(
apiKeyText: "# Ensure your AWS credentials are configured", apiKeyText: "# Ensure your AWS credentials are configured",
packageName: "langchain[aws]", packageName: "langchain[aws]",
}, },
{
value: "groq",
label: "Groq",
model: "llama3-8b-8192",
apiKeyName: "GROQ_API_KEY",
packageName: "langchain[groq]",
},
{ {
value: "cohere", value: "cohere",
label: "Cohere", label: "Cohere",

View File

@ -226,7 +226,7 @@ class FireCrawlLoader(BaseLoader):
*, *,
api_key: Optional[str] = None, api_key: Optional[str] = None,
api_url: Optional[str] = None, api_url: Optional[str] = None,
mode: Literal["crawl", "scrape", "map"] = "crawl", mode: Literal["crawl", "scrape", "map", "extract"] = "crawl",
params: Optional[dict] = None, params: Optional[dict] = None,
): ):
"""Initialize with API key and url. """Initialize with API key and url.
@ -241,6 +241,7 @@ class FireCrawlLoader(BaseLoader):
Options include "scrape" (single url), Options include "scrape" (single url),
"crawl" (all accessible sub pages), "crawl" (all accessible sub pages),
"map" (returns list of links that are semantically related). "map" (returns list of links that are semantically related).
"extract" (extracts structured data from a page).
params: The parameters to pass to the Firecrawl API. params: The parameters to pass to the Firecrawl API.
Examples include crawlerOptions. Examples include crawlerOptions.
For more details, visit: https://github.com/mendableai/firecrawl-py For more details, visit: https://github.com/mendableai/firecrawl-py
@ -252,9 +253,10 @@ class FireCrawlLoader(BaseLoader):
raise ImportError( raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`" "`firecrawl` package not found, please run `pip install firecrawl-py`"
) )
if mode not in ("crawl", "scrape", "search", "map"): if mode not in ("crawl", "scrape", "search", "map", "extract"):
raise ValueError( raise ValueError(
f"Invalid mode '{mode}'. Allowed: 'crawl', 'scrape', 'search', 'map'." f"""Invalid mode '{mode}'.
Allowed: 'crawl', 'scrape', 'search', 'map', 'extract'."""
) )
if not url: if not url:
@ -284,16 +286,23 @@ class FireCrawlLoader(BaseLoader):
if not self.url: if not self.url:
raise ValueError("URL is required for map mode") raise ValueError("URL is required for map mode")
firecrawl_docs = self.firecrawl.map_url(self.url, params=self.params) firecrawl_docs = self.firecrawl.map_url(self.url, params=self.params)
elif self.mode == "extract":
if not self.url:
raise ValueError("URL is required for extract mode")
firecrawl_docs = [
str(self.firecrawl.extract([self.url], params=self.params))
]
elif self.mode == "search": elif self.mode == "search":
raise ValueError( raise ValueError(
"Search mode is not supported in this version, please downgrade." "Search mode is not supported in this version, please downgrade."
) )
else: else:
raise ValueError( raise ValueError(
f"Invalid mode '{self.mode}'. Allowed: 'crawl', 'scrape', 'map'." f"""Invalid mode '{self.mode}'.
Allowed: 'crawl', 'scrape', 'map', 'extract'."""
) )
for doc in firecrawl_docs: for doc in firecrawl_docs:
if self.mode == "map": if self.mode == "map" or self.mode == "extract":
page_content = doc page_content = doc
metadata = {} metadata = {}
else: else:

View File

@ -281,12 +281,8 @@ class OpenAIWhisperParser(BaseBlobParser):
raise ImportError( raise ImportError(
"openai package not found, please install it with `pip install openai`" "openai package not found, please install it with `pip install openai`"
) )
try:
from pydub import AudioSegment audio = _get_audio_from_blob(blob)
except ImportError:
raise ImportError(
"pydub package not found, please install it with `pip install pydub`"
)
if is_openai_v1(): if is_openai_v1():
# api_key optional, defaults to `os.environ['OPENAI_API_KEY']` # api_key optional, defaults to `os.environ['OPENAI_API_KEY']`
@ -298,9 +294,6 @@ class OpenAIWhisperParser(BaseBlobParser):
if self.base_url: if self.base_url:
openai.api_base = self.base_url openai.api_base = self.base_url
# Audio file from disk
audio = AudioSegment.from_file(blob.path)
# Define the duration of each chunk in minutes # Define the duration of each chunk in minutes
# Need to meet 25MB size limit for Whisper API # Need to meet 25MB size limit for Whisper API
chunk_duration = 20 chunk_duration = 20
@ -451,13 +444,6 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
def lazy_parse(self, blob: Blob) -> Iterator[Document]: def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazily parse the blob.""" """Lazily parse the blob."""
try:
from pydub import AudioSegment
except ImportError:
raise ImportError(
"pydub package not found, please install it with `pip install pydub`"
)
try: try:
import librosa import librosa
except ImportError: except ImportError:
@ -466,8 +452,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
"`pip install librosa`" "`pip install librosa`"
) )
# Audio file from disk audio = _get_audio_from_blob(blob)
audio = AudioSegment.from_file(blob.path)
file_obj = io.BytesIO(audio.export(format="mp3").read()) file_obj = io.BytesIO(audio.export(format="mp3").read())
@ -529,12 +514,8 @@ class YandexSTTParser(BaseBlobParser):
"yandex-speechkit package not found, please install it with " "yandex-speechkit package not found, please install it with "
"`pip install yandex-speechkit`" "`pip install yandex-speechkit`"
) )
try:
from pydub import AudioSegment audio = _get_audio_from_blob(blob)
except ImportError:
raise ImportError(
"pydub package not found, please install it with `pip install pydub`"
)
if self.api_key: if self.api_key:
configure_credentials( configure_credentials(
@ -545,8 +526,6 @@ class YandexSTTParser(BaseBlobParser):
yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token) yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
) )
audio = AudioSegment.from_file(blob.path)
model = model_repository.recognition_model() model = model_repository.recognition_model()
model.model = self.model model.model = self.model
@ -645,13 +624,6 @@ class FasterWhisperParser(BaseBlobParser):
def lazy_parse(self, blob: Blob) -> Iterator[Document]: def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazily parse the blob.""" """Lazily parse the blob."""
try:
from pydub import AudioSegment
except ImportError:
raise ImportError(
"pydub package not found, please install it with `pip install pydub`"
)
try: try:
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
except ImportError: except ImportError:
@ -660,15 +632,7 @@ class FasterWhisperParser(BaseBlobParser):
"`pip install faster-whisper`" "`pip install faster-whisper`"
) )
# get the audio audio = _get_audio_from_blob(blob)
if isinstance(blob.data, bytes):
# blob contains the audio
audio = AudioSegment.from_file(io.BytesIO(blob.data))
elif blob.data is None and blob.path:
# Audio file from disk
audio = AudioSegment.from_file(blob.path)
else:
raise ValueError("Unable to get audio from blob")
file_obj = io.BytesIO(audio.export(format="mp3").read()) file_obj = io.BytesIO(audio.export(format="mp3").read())
@ -688,3 +652,33 @@ class FasterWhisperParser(BaseBlobParser):
**blob.metadata, **blob.metadata,
}, },
) )
def _get_audio_from_blob(blob: Blob) -> Any:
"""Get audio data from blob.
Args:
blob: Blob object containing the audio data.
Returns:
AudioSegment: Audio data from the blob.
Raises:
ImportError: If the required package `pydub` is not installed.
ValueError: If the audio data is not found in the blob
"""
try:
from pydub import AudioSegment
except ImportError:
raise ImportError(
"pydub package not found, please install it with `pip install pydub`"
)
if isinstance(blob.data, bytes):
audio = AudioSegment.from_file(io.BytesIO(blob.data))
elif blob.data is None and blob.path:
audio = AudioSegment.from_file(blob.path)
else:
raise ValueError("Unable to get audio from blob")
return audio

View File

@ -0,0 +1,100 @@
"""Test FireCrawlLoader."""
import sys
from typing import Generator, List, Tuple
from unittest.mock import MagicMock
import pytest
from langchain_core.documents import Document
from langchain_community.document_loaders import FireCrawlLoader
# firecrawl 모듈을 모킹하여 sys.modules에 등록
@pytest.fixture(autouse=True)
def mock_firecrawl() -> Generator[Tuple[MagicMock, MagicMock], None, None]:
"""Mock firecrawl module for all tests."""
mock_module = MagicMock()
mock_client = MagicMock()
# FirecrawlApp 클래스로 수정
mock_module.FirecrawlApp.return_value = mock_client
# extract 메서드의 반환값 설정
response_dict = {
"success": True,
"data": {
"title": "extracted title",
"main contents": "extracted main contents",
},
"status": "completed",
"expiresAt": "2025-03-12T12:42:09.000Z",
}
mock_client.extract.return_value = response_dict
# sys.modules에 모의 모듈 삽입
sys.modules["firecrawl"] = mock_module
yield mock_module, mock_client # 테스트에서 필요할 경우 접근할 수 있도록 yield
# 테스트 후 정리
if "firecrawl" in sys.modules:
del sys.modules["firecrawl"]
class TestFireCrawlLoader:
"""Test FireCrawlLoader."""
def test_load_extract_mode(
self, mock_firecrawl: Tuple[MagicMock, MagicMock]
) -> List[Document]:
"""Test loading in extract mode."""
# fixture에서 모킹된 객체 가져오기
_, mock_client = mock_firecrawl
params = {
"prompt": "extract the title and main contents(write your own prompt here)",
"schema": {
"type": "object",
"properties": {
"title": {"type": "string"},
"main contents": {"type": "string"},
},
"required": ["title", "main contents"],
},
"enableWebSearch": False,
"ignoreSitemap": False,
"showSources": False,
"scrapeOptions": {
"formats": ["markdown"],
"onlyMainContent": True,
"headers": {},
"waitFor": 0,
"mobile": False,
"skipTlsVerification": False,
"timeout": 30000,
"removeBase64Images": True,
"blockAds": True,
"proxy": "basic",
},
}
# FireCrawlLoader 인스턴스 생성 및 실행
loader = FireCrawlLoader(
url="https://example.com", api_key="fake-key", mode="extract", params=params
)
docs = list(loader.lazy_load()) # lazy_load 메서드 호출
# 검증
assert len(docs) == 1
assert isinstance(docs[0].page_content, str)
# extract 메서드가 올바른 인자로 호출되었는지 확인
mock_client.extract.assert_called_once_with(
["https://example.com"], params=params
)
# 응답이 문자열로 변환되었으므로 각 속성이 문자열에 포함되어 있는지 확인
assert "extracted title" in docs[0].page_content
assert "extracted main contents" in docs[0].page_content
assert "success" in docs[0].page_content
return docs

View File

@ -750,18 +750,29 @@ class BaseChatOpenAI(BaseChatModel):
) -> Iterator[ChatGenerationChunk]: ) -> Iterator[ChatGenerationChunk]:
kwargs["stream"] = True kwargs["stream"] = True
payload = self._get_request_payload(messages, stop=stop, **kwargs) payload = self._get_request_payload(messages, stop=stop, **kwargs)
context_manager = self.root_client.responses.create(**payload) if self.include_response_headers:
raw_context_manager = self.root_client.with_raw_response.responses.create(
**payload
)
context_manager = raw_context_manager.parse()
headers = {"headers": dict(raw_context_manager.headers)}
else:
context_manager = self.root_client.responses.create(**payload)
headers = {}
original_schema_obj = kwargs.get("response_format") original_schema_obj = kwargs.get("response_format")
with context_manager as response: with context_manager as response:
is_first_chunk = True
for chunk in response: for chunk in response:
metadata = headers if is_first_chunk else {}
if generation_chunk := _convert_responses_chunk_to_generation_chunk( if generation_chunk := _convert_responses_chunk_to_generation_chunk(
chunk, schema=original_schema_obj chunk, schema=original_schema_obj, metadata=metadata
): ):
if run_manager: if run_manager:
run_manager.on_llm_new_token( run_manager.on_llm_new_token(
generation_chunk.text, chunk=generation_chunk generation_chunk.text, chunk=generation_chunk
) )
is_first_chunk = False
yield generation_chunk yield generation_chunk
async def _astream_responses( async def _astream_responses(
@ -773,18 +784,31 @@ class BaseChatOpenAI(BaseChatModel):
) -> AsyncIterator[ChatGenerationChunk]: ) -> AsyncIterator[ChatGenerationChunk]:
kwargs["stream"] = True kwargs["stream"] = True
payload = self._get_request_payload(messages, stop=stop, **kwargs) payload = self._get_request_payload(messages, stop=stop, **kwargs)
context_manager = await self.root_async_client.responses.create(**payload) if self.include_response_headers:
raw_context_manager = (
await self.root_async_client.with_raw_response.responses.create(
**payload
)
)
context_manager = raw_context_manager.parse()
headers = {"headers": dict(raw_context_manager.headers)}
else:
context_manager = await self.root_async_client.responses.create(**payload)
headers = {}
original_schema_obj = kwargs.get("response_format") original_schema_obj = kwargs.get("response_format")
async with context_manager as response: async with context_manager as response:
is_first_chunk = True
async for chunk in response: async for chunk in response:
metadata = headers if is_first_chunk else {}
if generation_chunk := _convert_responses_chunk_to_generation_chunk( if generation_chunk := _convert_responses_chunk_to_generation_chunk(
chunk, schema=original_schema_obj chunk, schema=original_schema_obj, metadata=metadata
): ):
if run_manager: if run_manager:
await run_manager.on_llm_new_token( await run_manager.on_llm_new_token(
generation_chunk.text, chunk=generation_chunk generation_chunk.text, chunk=generation_chunk
) )
is_first_chunk = False
yield generation_chunk yield generation_chunk
def _stream( def _stream(
@ -877,19 +901,26 @@ class BaseChatOpenAI(BaseChatModel):
response = self.root_client.beta.chat.completions.parse(**payload) response = self.root_client.beta.chat.completions.parse(**payload)
except openai.BadRequestError as e: except openai.BadRequestError as e:
_handle_openai_bad_request(e) _handle_openai_bad_request(e)
elif self.include_response_headers:
raw_response = self.client.with_raw_response.create(**payload)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
elif self._use_responses_api(payload): elif self._use_responses_api(payload):
original_schema_obj = kwargs.get("response_format") original_schema_obj = kwargs.get("response_format")
if original_schema_obj and _is_pydantic_class(original_schema_obj): if original_schema_obj and _is_pydantic_class(original_schema_obj):
response = self.root_client.responses.parse(**payload) response = self.root_client.responses.parse(**payload)
else: else:
response = self.root_client.responses.create(**payload) if self.include_response_headers:
raw_response = self.root_client.with_raw_response.responses.create(
**payload
)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
else:
response = self.root_client.responses.create(**payload)
return _construct_lc_result_from_responses_api( return _construct_lc_result_from_responses_api(
response, schema=original_schema_obj response, schema=original_schema_obj, metadata=generation_info
) )
elif self.include_response_headers:
raw_response = self.client.with_raw_response.create(**payload)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
else: else:
response = self.client.create(**payload) response = self.client.create(**payload)
return self._create_chat_result(response, generation_info) return self._create_chat_result(response, generation_info)
@ -1065,20 +1096,28 @@ class BaseChatOpenAI(BaseChatModel):
) )
except openai.BadRequestError as e: except openai.BadRequestError as e:
_handle_openai_bad_request(e) _handle_openai_bad_request(e)
elif self.include_response_headers:
raw_response = await self.async_client.with_raw_response.create(**payload)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
elif self._use_responses_api(payload): elif self._use_responses_api(payload):
original_schema_obj = kwargs.get("response_format") original_schema_obj = kwargs.get("response_format")
if original_schema_obj and _is_pydantic_class(original_schema_obj): if original_schema_obj and _is_pydantic_class(original_schema_obj):
response = await self.root_async_client.responses.parse(**payload) response = await self.root_async_client.responses.parse(**payload)
else: else:
response = await self.root_async_client.responses.create(**payload) if self.include_response_headers:
raw_response = (
await self.root_async_client.with_raw_response.responses.create(
**payload
)
)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
else:
response = await self.root_async_client.responses.create(**payload)
return _construct_lc_result_from_responses_api( return _construct_lc_result_from_responses_api(
response, schema=original_schema_obj response, schema=original_schema_obj, metadata=generation_info
) )
elif self.include_response_headers:
raw_response = await self.async_client.with_raw_response.create(**payload)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
else: else:
response = await self.async_client.create(**payload) response = await self.async_client.create(**payload)
return await run_in_executor( return await run_in_executor(
@ -2834,6 +2873,13 @@ def _use_responses_api(payload: dict) -> bool:
def _construct_responses_api_payload( def _construct_responses_api_payload(
messages: Sequence[BaseMessage], payload: dict messages: Sequence[BaseMessage], payload: dict
) -> dict: ) -> dict:
# Rename legacy parameters
for legacy_token_param in ["max_tokens", "max_completion_tokens"]:
if legacy_token_param in payload:
payload["max_output_tokens"] = payload.pop(legacy_token_param)
if "reasoning_effort" in payload:
payload["reasoning"] = {"effort": payload.pop("reasoning_effort")}
payload["input"] = _construct_responses_api_input(messages) payload["input"] = _construct_responses_api_input(messages)
if tools := payload.pop("tools", None): if tools := payload.pop("tools", None):
new_tools: list = [] new_tools: list = []
@ -2868,17 +2914,23 @@ def _construct_responses_api_payload(
# For pydantic + non-streaming case, we use responses.parse. # For pydantic + non-streaming case, we use responses.parse.
# Otherwise, we use responses.create. # Otherwise, we use responses.create.
strict = payload.pop("strict", None)
if not payload.get("stream") and _is_pydantic_class(schema): if not payload.get("stream") and _is_pydantic_class(schema):
payload["text_format"] = schema payload["text_format"] = schema
else: else:
if _is_pydantic_class(schema): if _is_pydantic_class(schema):
schema_dict = schema.model_json_schema() schema_dict = schema.model_json_schema()
strict = True
else: else:
schema_dict = schema schema_dict = schema
if schema_dict == {"type": "json_object"}: # JSON mode if schema_dict == {"type": "json_object"}: # JSON mode
payload["text"] = {"format": {"type": "json_object"}} payload["text"] = {"format": {"type": "json_object"}}
elif ( elif (
(response_format := _convert_to_openai_response_format(schema_dict)) (
response_format := _convert_to_openai_response_format(
schema_dict, strict=strict
)
)
and (isinstance(response_format, dict)) and (isinstance(response_format, dict))
and (response_format["type"] == "json_schema") and (response_format["type"] == "json_schema")
): ):
@ -2993,7 +3045,9 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
def _construct_lc_result_from_responses_api( def _construct_lc_result_from_responses_api(
response: Response, schema: Optional[Type[_BM]] = None response: Response,
schema: Optional[Type[_BM]] = None,
metadata: Optional[dict] = None,
) -> ChatResult: ) -> ChatResult:
"""Construct ChatResponse from OpenAI Response API response.""" """Construct ChatResponse from OpenAI Response API response."""
if response.error: if response.error:
@ -3014,6 +3068,8 @@ def _construct_lc_result_from_responses_api(
"model", "model",
) )
} }
if metadata:
response_metadata.update(metadata)
# for compatibility with chat completion calls. # for compatibility with chat completion calls.
response_metadata["model_name"] = response_metadata.get("model") response_metadata["model_name"] = response_metadata.get("model")
if response.usage: if response.usage:
@ -3099,17 +3155,21 @@ def _construct_lc_result_from_responses_api(
if ( if (
schema is not None schema is not None
and "parsed" not in additional_kwargs and "parsed" not in additional_kwargs
and response.output_text # tool calls can generate empty output text
and response.text and response.text
and (text_config := response.text.model_dump()) and (text_config := response.text.model_dump())
and (format_ := text_config.get("format", {})) and (format_ := text_config.get("format", {}))
and (format_.get("type") == "json_schema") and (format_.get("type") == "json_schema")
): ):
parsed_dict = json.loads(response.output_text) try:
if schema and _is_pydantic_class(schema): parsed_dict = json.loads(response.output_text)
parsed = schema(**parsed_dict) if schema and _is_pydantic_class(schema):
else: parsed = schema(**parsed_dict)
parsed = parsed_dict else:
additional_kwargs["parsed"] = parsed parsed = parsed_dict
additional_kwargs["parsed"] = parsed
except json.JSONDecodeError:
pass
message = AIMessage( message = AIMessage(
content=content_blocks, content=content_blocks,
id=msg_id, id=msg_id,
@ -3123,12 +3183,15 @@ def _construct_lc_result_from_responses_api(
def _convert_responses_chunk_to_generation_chunk( def _convert_responses_chunk_to_generation_chunk(
chunk: Any, schema: Optional[Type[_BM]] = None chunk: Any, schema: Optional[Type[_BM]] = None, metadata: Optional[dict] = None
) -> Optional[ChatGenerationChunk]: ) -> Optional[ChatGenerationChunk]:
content = [] content = []
tool_call_chunks: list = [] tool_call_chunks: list = []
additional_kwargs: dict = {} additional_kwargs: dict = {}
response_metadata = {} if metadata:
response_metadata = metadata
else:
response_metadata = {}
usage_metadata = None usage_metadata = None
id = None id = None
if chunk.type == "response.output_text.delta": if chunk.type == "response.output_text.delta":

View File

@ -7,12 +7,12 @@ authors = []
license = { text = "MIT" } license = { text = "MIT" }
requires-python = "<4.0,>=3.9" requires-python = "<4.0,>=3.9"
dependencies = [ dependencies = [
"langchain-core<1.0.0,>=0.3.45-rc.1", "langchain-core<1.0.0,>=0.3.45",
"openai<2.0.0,>=1.66.0", "openai<2.0.0,>=1.66.3",
"tiktoken<1,>=0.7", "tiktoken<1,>=0.7",
] ]
name = "langchain-openai" name = "langchain-openai"
version = "0.3.9-rc.1" version = "0.3.9"
description = "An integration package connecting OpenAI and LangChain" description = "An integration package connecting OpenAI and LangChain"
readme = "README.md" readme = "README.md"

View File

@ -22,7 +22,7 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
def chat_model_params(self) -> dict: def chat_model_params(self) -> dict:
return { return {
"deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"], "deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
"model": "gpt-4o", "model": "gpt-4o-mini",
"openai_api_version": OPENAI_API_VERSION, "openai_api_version": OPENAI_API_VERSION,
"azure_endpoint": OPENAI_API_BASE, "azure_endpoint": OPENAI_API_BASE,
} }

View File

@ -31,6 +31,8 @@ from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI
from tests.unit_tests.fake.callbacks import FakeCallbackHandler from tests.unit_tests.fake.callbacks import FakeCallbackHandler
MAX_TOKEN_COUNT = 16
@pytest.mark.scheduled @pytest.mark.scheduled
def test_chat_openai() -> None: def test_chat_openai() -> None:
@ -44,7 +46,7 @@ def test_chat_openai() -> None:
max_retries=3, max_retries=3,
http_client=None, http_client=None,
n=1, n=1,
max_completion_tokens=10, max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg]
default_headers=None, default_headers=None,
default_query=None, default_query=None,
) )
@ -62,20 +64,21 @@ def test_chat_openai_model() -> None:
assert chat.model_name == "bar" assert chat.model_name == "bar"
def test_chat_openai_system_message() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
def test_chat_openai_system_message(use_responses_api: bool) -> None:
"""Test ChatOpenAI wrapper with system message.""" """Test ChatOpenAI wrapper with system message."""
chat = ChatOpenAI(max_completion_tokens=10) chat = ChatOpenAI(use_responses_api=use_responses_api, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
system_message = SystemMessage(content="You are to chat with the user.") system_message = SystemMessage(content="You are to chat with the user.")
human_message = HumanMessage(content="Hello") human_message = HumanMessage(content="Hello")
response = chat.invoke([system_message, human_message]) response = chat.invoke([system_message, human_message])
assert isinstance(response, BaseMessage) assert isinstance(response, BaseMessage)
assert isinstance(response.content, str) assert isinstance(response.text(), str)
@pytest.mark.scheduled @pytest.mark.scheduled
def test_chat_openai_generate() -> None: def test_chat_openai_generate() -> None:
"""Test ChatOpenAI wrapper with generate.""" """Test ChatOpenAI wrapper with generate."""
chat = ChatOpenAI(max_completion_tokens=10, n=2) chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, n=2) # type: ignore[call-arg]
message = HumanMessage(content="Hello") message = HumanMessage(content="Hello")
response = chat.generate([[message], [message]]) response = chat.generate([[message], [message]])
assert isinstance(response, LLMResult) assert isinstance(response, LLMResult)
@ -92,7 +95,7 @@ def test_chat_openai_generate() -> None:
@pytest.mark.scheduled @pytest.mark.scheduled
def test_chat_openai_multiple_completions() -> None: def test_chat_openai_multiple_completions() -> None:
"""Test ChatOpenAI wrapper with multiple completions.""" """Test ChatOpenAI wrapper with multiple completions."""
chat = ChatOpenAI(max_completion_tokens=10, n=5) chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, n=5) # type: ignore[call-arg]
message = HumanMessage(content="Hello") message = HumanMessage(content="Hello")
response = chat._generate([message]) response = chat._generate([message])
assert isinstance(response, ChatResult) assert isinstance(response, ChatResult)
@ -103,16 +106,18 @@ def test_chat_openai_multiple_completions() -> None:
@pytest.mark.scheduled @pytest.mark.scheduled
def test_chat_openai_streaming() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
def test_chat_openai_streaming(use_responses_api: bool) -> None:
"""Test that streaming correctly invokes on_llm_new_token callback.""" """Test that streaming correctly invokes on_llm_new_token callback."""
callback_handler = FakeCallbackHandler() callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler]) callback_manager = CallbackManager([callback_handler])
chat = ChatOpenAI( chat = ChatOpenAI(
max_completion_tokens=10, max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg]
streaming=True, streaming=True,
temperature=0, temperature=0,
callback_manager=callback_manager, callback_manager=callback_manager,
verbose=True, verbose=True,
use_responses_api=use_responses_api,
) )
message = HumanMessage(content="Hello") message = HumanMessage(content="Hello")
response = chat.invoke([message]) response = chat.invoke([message])
@ -133,9 +138,7 @@ def test_chat_openai_streaming_generation_info() -> None:
callback = _FakeCallback() callback = _FakeCallback()
callback_manager = CallbackManager([callback]) callback_manager = CallbackManager([callback])
chat = ChatOpenAI( chat = ChatOpenAI(max_tokens=2, temperature=0, callback_manager=callback_manager) # type: ignore[call-arg]
max_completion_tokens=2, temperature=0, callback_manager=callback_manager
)
list(chat.stream("hi")) list(chat.stream("hi"))
generation = callback.saved_things["generation"] generation = callback.saved_things["generation"]
# `Hello!` is two tokens, assert that that is what is returned # `Hello!` is two tokens, assert that that is what is returned
@ -144,7 +147,7 @@ def test_chat_openai_streaming_generation_info() -> None:
def test_chat_openai_llm_output_contains_model_name() -> None: def test_chat_openai_llm_output_contains_model_name() -> None:
"""Test llm_output contains model_name.""" """Test llm_output contains model_name."""
chat = ChatOpenAI(max_completion_tokens=10) chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
message = HumanMessage(content="Hello") message = HumanMessage(content="Hello")
llm_result = chat.generate([[message]]) llm_result = chat.generate([[message]])
assert llm_result.llm_output is not None assert llm_result.llm_output is not None
@ -153,7 +156,7 @@ def test_chat_openai_llm_output_contains_model_name() -> None:
def test_chat_openai_streaming_llm_output_contains_model_name() -> None: def test_chat_openai_streaming_llm_output_contains_model_name() -> None:
"""Test llm_output contains model_name.""" """Test llm_output contains model_name."""
chat = ChatOpenAI(max_completion_tokens=10, streaming=True) chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, streaming=True) # type: ignore[call-arg]
message = HumanMessage(content="Hello") message = HumanMessage(content="Hello")
llm_result = chat.generate([[message]]) llm_result = chat.generate([[message]])
assert llm_result.llm_output is not None assert llm_result.llm_output is not None
@ -163,13 +166,13 @@ def test_chat_openai_streaming_llm_output_contains_model_name() -> None:
def test_chat_openai_invalid_streaming_params() -> None: def test_chat_openai_invalid_streaming_params() -> None:
"""Test that streaming correctly invokes on_llm_new_token callback.""" """Test that streaming correctly invokes on_llm_new_token callback."""
with pytest.raises(ValueError): with pytest.raises(ValueError):
ChatOpenAI(max_completion_tokens=10, streaming=True, temperature=0, n=5) ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, streaming=True, temperature=0, n=5) # type: ignore[call-arg]
@pytest.mark.scheduled @pytest.mark.scheduled
async def test_async_chat_openai() -> None: async def test_async_chat_openai() -> None:
"""Test async generation.""" """Test async generation."""
chat = ChatOpenAI(max_completion_tokens=10, n=2) chat = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, n=2) # type: ignore[call-arg]
message = HumanMessage(content="Hello") message = HumanMessage(content="Hello")
response = await chat.agenerate([[message], [message]]) response = await chat.agenerate([[message], [message]])
assert isinstance(response, LLMResult) assert isinstance(response, LLMResult)
@ -189,7 +192,7 @@ async def test_async_chat_openai_streaming() -> None:
callback_handler = FakeCallbackHandler() callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler]) callback_manager = CallbackManager([callback_handler])
chat = ChatOpenAI( chat = ChatOpenAI(
max_completion_tokens=10, max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg]
streaming=True, streaming=True,
temperature=0, temperature=0,
callback_manager=callback_manager, callback_manager=callback_manager,
@ -221,7 +224,7 @@ async def test_async_chat_openai_bind_functions() -> None:
default=None, title="Fav Food", description="The person's favorite food" default=None, title="Fav Food", description="The person's favorite food"
) )
chat = ChatOpenAI(max_completion_tokens=30, n=1, streaming=True).bind_functions( chat = ChatOpenAI(max_tokens=30, n=1, streaming=True).bind_functions( # type: ignore[call-arg]
functions=[Person], function_call="Person" functions=[Person], function_call="Person"
) )
@ -243,7 +246,7 @@ async def test_async_chat_openai_bind_functions() -> None:
@pytest.mark.scheduled @pytest.mark.scheduled
def test_openai_streaming() -> None: def test_openai_streaming() -> None:
"""Test streaming tokens from OpenAI.""" """Test streaming tokens from OpenAI."""
llm = ChatOpenAI(max_completion_tokens=10) llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
for token in llm.stream("I'm Pickle Rick"): for token in llm.stream("I'm Pickle Rick"):
assert isinstance(token.content, str) assert isinstance(token.content, str)
@ -252,7 +255,7 @@ def test_openai_streaming() -> None:
@pytest.mark.scheduled @pytest.mark.scheduled
async def test_openai_astream() -> None: async def test_openai_astream() -> None:
"""Test streaming tokens from OpenAI.""" """Test streaming tokens from OpenAI."""
llm = ChatOpenAI(max_completion_tokens=10) llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
async for token in llm.astream("I'm Pickle Rick"): async for token in llm.astream("I'm Pickle Rick"):
assert isinstance(token.content, str) assert isinstance(token.content, str)
@ -261,7 +264,7 @@ async def test_openai_astream() -> None:
@pytest.mark.scheduled @pytest.mark.scheduled
async def test_openai_abatch() -> None: async def test_openai_abatch() -> None:
"""Test streaming tokens from ChatOpenAI.""" """Test streaming tokens from ChatOpenAI."""
llm = ChatOpenAI(max_completion_tokens=10) llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
for token in result: for token in result:
@ -269,21 +272,22 @@ async def test_openai_abatch() -> None:
@pytest.mark.scheduled @pytest.mark.scheduled
async def test_openai_abatch_tags() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
async def test_openai_abatch_tags(use_responses_api: bool) -> None:
"""Test batch tokens from ChatOpenAI.""" """Test batch tokens from ChatOpenAI."""
llm = ChatOpenAI(max_completion_tokens=10) llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT, use_responses_api=use_responses_api) # type: ignore[call-arg]
result = await llm.abatch( result = await llm.abatch(
["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
) )
for token in result: for token in result:
assert isinstance(token.content, str) assert isinstance(token.text(), str)
@pytest.mark.scheduled @pytest.mark.scheduled
def test_openai_batch() -> None: def test_openai_batch() -> None:
"""Test batch tokens from ChatOpenAI.""" """Test batch tokens from ChatOpenAI."""
llm = ChatOpenAI(max_completion_tokens=10) llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
for token in result: for token in result:
@ -293,7 +297,7 @@ def test_openai_batch() -> None:
@pytest.mark.scheduled @pytest.mark.scheduled
async def test_openai_ainvoke() -> None: async def test_openai_ainvoke() -> None:
"""Test invoke tokens from ChatOpenAI.""" """Test invoke tokens from ChatOpenAI."""
llm = ChatOpenAI(max_completion_tokens=10) llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
assert isinstance(result.content, str) assert isinstance(result.content, str)
@ -302,7 +306,7 @@ async def test_openai_ainvoke() -> None:
@pytest.mark.scheduled @pytest.mark.scheduled
def test_openai_invoke() -> None: def test_openai_invoke() -> None:
"""Test invoke tokens from ChatOpenAI.""" """Test invoke tokens from ChatOpenAI."""
llm = ChatOpenAI(max_completion_tokens=10) llm = ChatOpenAI(max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
assert isinstance(result.content, str) assert isinstance(result.content, str)
@ -387,7 +391,7 @@ async def test_astream() -> None:
assert chunks_with_token_counts == 0 assert chunks_with_token_counts == 0
assert full.usage_metadata is None assert full.usage_metadata is None
llm = ChatOpenAI(temperature=0, max_completion_tokens=5) llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
await _test_stream(llm.astream("Hello"), expect_usage=False) await _test_stream(llm.astream("Hello"), expect_usage=False)
await _test_stream( await _test_stream(
llm.astream("Hello", stream_options={"include_usage": True}), expect_usage=True llm.astream("Hello", stream_options={"include_usage": True}), expect_usage=True
@ -395,7 +399,7 @@ async def test_astream() -> None:
await _test_stream(llm.astream("Hello", stream_usage=True), expect_usage=True) await _test_stream(llm.astream("Hello", stream_usage=True), expect_usage=True)
llm = ChatOpenAI( llm = ChatOpenAI(
temperature=0, temperature=0,
max_completion_tokens=5, max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg]
model_kwargs={"stream_options": {"include_usage": True}}, model_kwargs={"stream_options": {"include_usage": True}},
) )
await _test_stream(llm.astream("Hello"), expect_usage=True) await _test_stream(llm.astream("Hello"), expect_usage=True)
@ -403,7 +407,7 @@ async def test_astream() -> None:
llm.astream("Hello", stream_options={"include_usage": False}), llm.astream("Hello", stream_options={"include_usage": False}),
expect_usage=False, expect_usage=False,
) )
llm = ChatOpenAI(temperature=0, max_completion_tokens=5, stream_usage=True) llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT, stream_usage=True) # type: ignore[call-arg]
await _test_stream(llm.astream("Hello"), expect_usage=True) await _test_stream(llm.astream("Hello"), expect_usage=True)
await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False) await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)
@ -572,9 +576,12 @@ def test_tool_use() -> None:
llm_with_tool.invoke(msgs) llm_with_tool.invoke(msgs)
def test_manual_tool_call_msg() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
def test_manual_tool_call_msg(use_responses_api: bool) -> None:
"""Test passing in manually construct tool call message.""" """Test passing in manually construct tool call message."""
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) llm = ChatOpenAI(
model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api
)
llm_with_tool = llm.bind_tools(tools=[GenerateUsername]) llm_with_tool = llm.bind_tools(tools=[GenerateUsername])
msgs: List = [ msgs: List = [
HumanMessage("Sally has green hair, what would her username be?"), HumanMessage("Sally has green hair, what would her username be?"),
@ -615,9 +622,12 @@ def test_manual_tool_call_msg() -> None:
llm_with_tool.invoke(msgs) llm_with_tool.invoke(msgs)
def test_bind_tools_tool_choice() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
def test_bind_tools_tool_choice(use_responses_api: bool) -> None:
"""Test passing in manually construct tool call message.""" """Test passing in manually construct tool call message."""
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) llm = ChatOpenAI(
model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api
)
for tool_choice in ("any", "required"): for tool_choice in ("any", "required"):
llm_with_tools = llm.bind_tools( llm_with_tools = llm.bind_tools(
tools=[GenerateUsername, MakeASandwich], tool_choice=tool_choice tools=[GenerateUsername, MakeASandwich], tool_choice=tool_choice
@ -677,11 +687,14 @@ def test_openai_proxy() -> None:
assert proxy.port == 8080 assert proxy.port == 8080
def test_openai_response_headers() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
def test_openai_response_headers(use_responses_api: bool) -> None:
"""Test ChatOpenAI response headers.""" """Test ChatOpenAI response headers."""
chat_openai = ChatOpenAI(include_response_headers=True) chat_openai = ChatOpenAI(
include_response_headers=True, use_responses_api=use_responses_api
)
query = "I'm Pickle Rick" query = "I'm Pickle Rick"
result = chat_openai.invoke(query, max_completion_tokens=10) result = chat_openai.invoke(query, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
headers = result.response_metadata["headers"] headers = result.response_metadata["headers"]
assert headers assert headers
assert isinstance(headers, dict) assert isinstance(headers, dict)
@ -689,7 +702,7 @@ def test_openai_response_headers() -> None:
# Stream # Stream
full: Optional[BaseMessageChunk] = None full: Optional[BaseMessageChunk] = None
for chunk in chat_openai.stream(query, max_completion_tokens=10): for chunk in chat_openai.stream(query, max_tokens=MAX_TOKEN_COUNT): # type: ignore[call-arg]
full = chunk if full is None else full + chunk full = chunk if full is None else full + chunk
assert isinstance(full, AIMessage) assert isinstance(full, AIMessage)
headers = full.response_metadata["headers"] headers = full.response_metadata["headers"]
@ -698,11 +711,14 @@ def test_openai_response_headers() -> None:
assert "content-type" in headers assert "content-type" in headers
async def test_openai_response_headers_async() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
async def test_openai_response_headers_async(use_responses_api: bool) -> None:
"""Test ChatOpenAI response headers.""" """Test ChatOpenAI response headers."""
chat_openai = ChatOpenAI(include_response_headers=True) chat_openai = ChatOpenAI(
include_response_headers=True, use_responses_api=use_responses_api
)
query = "I'm Pickle Rick" query = "I'm Pickle Rick"
result = await chat_openai.ainvoke(query, max_completion_tokens=10) result = await chat_openai.ainvoke(query, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
headers = result.response_metadata["headers"] headers = result.response_metadata["headers"]
assert headers assert headers
assert isinstance(headers, dict) assert isinstance(headers, dict)
@ -710,7 +726,7 @@ async def test_openai_response_headers_async() -> None:
# Stream # Stream
full: Optional[BaseMessageChunk] = None full: Optional[BaseMessageChunk] = None
async for chunk in chat_openai.astream(query, max_completion_tokens=10): async for chunk in chat_openai.astream(query, max_tokens=MAX_TOKEN_COUNT): # type: ignore[call-arg]
full = chunk if full is None else full + chunk full = chunk if full is None else full + chunk
assert isinstance(full, AIMessage) assert isinstance(full, AIMessage)
headers = full.response_metadata["headers"] headers = full.response_metadata["headers"]
@ -795,7 +811,8 @@ def test_image_token_counting_png() -> None:
assert expected == actual assert expected == actual
def test_tool_calling_strict() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
def test_tool_calling_strict(use_responses_api: bool) -> None:
"""Test tool calling with strict=True.""" """Test tool calling with strict=True."""
class magic_function(BaseModel): class magic_function(BaseModel):
@ -803,7 +820,9 @@ def test_tool_calling_strict() -> None:
input: int input: int
model = ChatOpenAI(model="gpt-4o", temperature=0) model = ChatOpenAI(
model="gpt-4o", temperature=0, use_responses_api=use_responses_api
)
model_with_tools = model.bind_tools([magic_function], strict=True) model_with_tools = model.bind_tools([magic_function], strict=True)
# invalid_magic_function adds metadata to schema that isn't supported by OpenAI. # invalid_magic_function adds metadata to schema that isn't supported by OpenAI.
@ -832,19 +851,22 @@ def test_tool_calling_strict() -> None:
next(model_with_invalid_tool_schema.stream(query)) next(model_with_invalid_tool_schema.stream(query))
@pytest.mark.parametrize("use_responses_api", [False, True])
@pytest.mark.parametrize( @pytest.mark.parametrize(
("model", "method"), ("model", "method"),
[("gpt-4o", "function_calling"), ("gpt-4o-2024-08-06", "json_schema")], [("gpt-4o", "function_calling"), ("gpt-4o-2024-08-06", "json_schema")],
) )
def test_structured_output_strict( def test_structured_output_strict(
model: str, method: Literal["function_calling", "json_schema"] model: str,
method: Literal["function_calling", "json_schema"],
use_responses_api: bool,
) -> None: ) -> None:
"""Test to verify structured output with strict=True.""" """Test to verify structured output with strict=True."""
from pydantic import BaseModel as BaseModelProper from pydantic import BaseModel as BaseModelProper
from pydantic import Field as FieldProper from pydantic import Field as FieldProper
llm = ChatOpenAI(model=model) llm = ChatOpenAI(model=model, use_responses_api=use_responses_api)
class Joke(BaseModelProper): class Joke(BaseModelProper):
"""Joke to tell user.""" """Joke to tell user."""
@ -898,15 +920,16 @@ def test_structured_output_strict(
next(chat.stream("Tell me a joke about cats.")) next(chat.stream("Tell me a joke about cats."))
@pytest.mark.parametrize("use_responses_api", [False, True])
@pytest.mark.parametrize(("model", "method"), [("gpt-4o-2024-08-06", "json_schema")]) @pytest.mark.parametrize(("model", "method"), [("gpt-4o-2024-08-06", "json_schema")])
def test_nested_structured_output_strict( def test_nested_structured_output_strict(
model: str, method: Literal["json_schema"] model: str, method: Literal["json_schema"], use_responses_api: bool
) -> None: ) -> None:
"""Test to verify structured output with strict=True for nested object.""" """Test to verify structured output with strict=True for nested object."""
from typing import TypedDict from typing import TypedDict
llm = ChatOpenAI(model=model, temperature=0) llm = ChatOpenAI(model=model, temperature=0, use_responses_api=use_responses_api)
class SelfEvaluation(TypedDict): class SelfEvaluation(TypedDict):
score: int score: int
@ -1124,12 +1147,20 @@ def test_prediction_tokens() -> None:
assert output_token_details["rejected_prediction_tokens"] > 0 assert output_token_details["rejected_prediction_tokens"] > 0
def test_stream_o1() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
list(ChatOpenAI(model="o1-mini").stream("how are you")) def test_stream_o_series(use_responses_api: bool) -> None:
list(
ChatOpenAI(model="o3-mini", use_responses_api=use_responses_api).stream(
"how are you"
)
)
async def test_astream_o1() -> None: @pytest.mark.parametrize("use_responses_api", [False, True])
async for _ in ChatOpenAI(model="o1-mini").astream("how are you"): async def test_astream_o_series(use_responses_api: bool) -> None:
async for _ in ChatOpenAI(
model="o3-mini", use_responses_api=use_responses_api
).astream("how are you"):
pass pass
@ -1171,21 +1202,27 @@ async def test_astream_response_format() -> None:
assert parsed.response == parsed_content["response"] assert parsed.response == parsed_content["response"]
@pytest.mark.parametrize("use_responses_api", [False, True])
@pytest.mark.parametrize("use_max_completion_tokens", [True, False]) @pytest.mark.parametrize("use_max_completion_tokens", [True, False])
def test_o1(use_max_completion_tokens: bool) -> None: def test_o1(use_max_completion_tokens: bool, use_responses_api: bool) -> None:
if use_max_completion_tokens: if use_max_completion_tokens:
kwargs: dict = {"max_completion_tokens": 10} kwargs: dict = {"max_completion_tokens": MAX_TOKEN_COUNT}
else: else:
kwargs = {"max_tokens": 10} kwargs = {"max_tokens": MAX_TOKEN_COUNT}
response = ChatOpenAI(model="o1", reasoning_effort="low", **kwargs).invoke( response = ChatOpenAI(
model="o1",
reasoning_effort="low",
use_responses_api=use_responses_api,
**kwargs,
).invoke(
[ [
{"role": "developer", "content": "respond in all caps"}, {"role": "developer", "content": "respond in all caps"},
{"role": "user", "content": "HOW ARE YOU"}, {"role": "user", "content": "HOW ARE YOU"},
] ]
) )
assert isinstance(response, AIMessage) assert isinstance(response, AIMessage)
assert isinstance(response.content, str) assert isinstance(response.text(), str)
assert response.content.upper() == response.content assert response.text().upper() == response.text()
@pytest.mark.scheduled @pytest.mark.scheduled

View File

@ -4,6 +4,7 @@ import json
import os import os
from typing import Any, Optional, cast from typing import Any, Optional, cast
import openai
import pytest import pytest
from langchain_core.messages import ( from langchain_core.messages import (
AIMessage, AIMessage,
@ -12,7 +13,7 @@ from langchain_core.messages import (
BaseMessageChunk, BaseMessageChunk,
) )
from pydantic import BaseModel from pydantic import BaseModel
from typing_extensions import TypedDict from typing_extensions import Annotated, TypedDict
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI
@ -81,6 +82,15 @@ def test_web_search() -> None:
# Manually pass in chat history # Manually pass in chat history
response = llm.invoke( response = llm.invoke(
[ [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What was a positive news story from today?",
}
],
},
first_response, first_response,
{ {
"role": "user", "role": "user",
@ -206,6 +216,31 @@ def test_parsed_dict_schema(schema: Any) -> None:
assert parsed["response"] and isinstance(parsed["response"], str) assert parsed["response"] and isinstance(parsed["response"], str)
def test_parsed_strict() -> None:
llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
class InvalidJoke(TypedDict):
setup: Annotated[str, ..., "The setup of the joke"]
punchline: Annotated[str, None, "The punchline of the joke"]
# Test not strict
response = llm.invoke("Tell me a joke", response_format=InvalidJoke)
parsed = json.loads(response.text())
assert parsed == response.additional_kwargs["parsed"]
# Test strict
with pytest.raises(openai.BadRequestError):
llm.invoke(
"Tell me a joke about cats.", response_format=InvalidJoke, strict=True
)
with pytest.raises(openai.BadRequestError):
next(
llm.stream(
"Tell me a joke about cats.", response_format=InvalidJoke, strict=True
)
)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict]) @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
async def test_parsed_dict_schema_async(schema: Any) -> None: async def test_parsed_dict_schema_async(schema: Any) -> None:
llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
@ -245,6 +280,18 @@ def test_function_calling_and_structured_output() -> None:
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
def test_reasoning() -> None:
llm = ChatOpenAI(model="o3-mini", use_responses_api=True)
response = llm.invoke("Hello", reasoning={"effort": "low"})
assert isinstance(response, AIMessage)
assert response.additional_kwargs["reasoning"]
llm = ChatOpenAI(model="o3-mini", reasoning_effort="low", use_responses_api=True)
response = llm.invoke("Hello")
assert isinstance(response, AIMessage)
assert response.additional_kwargs["reasoning"]
def test_stateful_api() -> None: def test_stateful_api() -> None:
llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
response = llm.invoke("how are you, my name is Bobo") response = llm.invoke("how are you, my name is Bobo")

View File

@ -462,7 +462,7 @@ wheels = [
[[package]] [[package]]
name = "langchain-core" name = "langchain-core"
version = "0.3.45rc1" version = "0.3.45"
source = { editable = "../../core" } source = { editable = "../../core" }
dependencies = [ dependencies = [
{ name = "jsonpatch" }, { name = "jsonpatch" },
@ -520,7 +520,7 @@ typing = [
[[package]] [[package]]
name = "langchain-openai" name = "langchain-openai"
version = "0.3.9rc1" version = "0.3.9"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "langchain-core" }, { name = "langchain-core" },
@ -566,7 +566,7 @@ typing = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "langchain-core", editable = "../../core" }, { name = "langchain-core", editable = "../../core" },
{ name = "openai", specifier = ">=1.66.0,<2.0.0" }, { name = "openai", specifier = ">=1.66.3,<2.0.0" },
{ name = "tiktoken", specifier = ">=0.7,<1" }, { name = "tiktoken", specifier = ">=0.7,<1" },
] ]
@ -751,7 +751,7 @@ wheels = [
[[package]] [[package]]
name = "openai" name = "openai"
version = "1.66.0" version = "1.66.3"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio" }, { name = "anyio" },
@ -763,9 +763,9 @@ dependencies = [
{ name = "tqdm" }, { name = "tqdm" },
{ name = "typing-extensions" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/84/c5/3c422ca3ccc81c063955e7c20739d7f8f37fea0af865c4a60c81e6225e14/openai-1.66.0.tar.gz", hash = "sha256:8a9e672bc6eadec60a962f0b40d7d1c09050010179c919ed65322e433e2d1025", size = 396819 } sdist = { url = "https://files.pythonhosted.org/packages/a3/77/5172104ca1df35ed2ed8fb26dbc787f721c39498fc51d666c4db07756a0c/openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9", size = 397244 }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/d7/f1/d52960dac9519c9de64593460826a0fe2e19159389ec97ecf3e931d2e6a3/openai-1.66.0-py3-none-any.whl", hash = "sha256:43e4a3c0c066cc5809be4e6aac456a3ebc4ec1848226ef9d1340859ac130d45a", size = 566389 }, { url = "https://files.pythonhosted.org/packages/78/5a/e20182f7b6171642d759c548daa0ba20a1d3ac10d2bd0a13fd75704a9ac3/openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9", size = 567400 },
] ]
[[package]] [[package]]

View File

@ -41,14 +41,12 @@ class BaseStandardTests(ABC):
base_tests = set( base_tests = set(
[method for method in dir(comparison_class) if method.startswith("test_")] [method for method in dir(comparison_class) if method.startswith("test_")]
) )
non_standard_tests = running_tests - base_tests
assert not non_standard_tests, f"Non-standard tests found: {non_standard_tests}"
deleted_tests = base_tests - running_tests deleted_tests = base_tests - running_tests
assert not deleted_tests, f"Standard tests deleted: {deleted_tests}" assert not deleted_tests, f"Standard tests deleted: {deleted_tests}"
overridden_tests = [ overridden_tests = [
method method
for method in running_tests for method in base_tests
if getattr(self.__class__, method) is not getattr(comparison_class, method) if getattr(self.__class__, method) is not getattr(comparison_class, method)
] ]

View File

@ -36,6 +36,7 @@ from langchain_text_splitters.html import (
HTMLSemanticPreservingSplitter, HTMLSemanticPreservingSplitter,
) )
from langchain_text_splitters.json import RecursiveJsonSplitter from langchain_text_splitters.json import RecursiveJsonSplitter
from langchain_text_splitters.jsx import JSFrameworkTextSplitter
from langchain_text_splitters.konlpy import KonlpyTextSplitter from langchain_text_splitters.konlpy import KonlpyTextSplitter
from langchain_text_splitters.latex import LatexTextSplitter from langchain_text_splitters.latex import LatexTextSplitter
from langchain_text_splitters.markdown import ( from langchain_text_splitters.markdown import (
@ -60,6 +61,7 @@ __all__ = [
"RecursiveCharacterTextSplitter", "RecursiveCharacterTextSplitter",
"RecursiveJsonSplitter", "RecursiveJsonSplitter",
"LatexTextSplitter", "LatexTextSplitter",
"JSFrameworkTextSplitter",
"PythonCodeTextSplitter", "PythonCodeTextSplitter",
"KonlpyTextSplitter", "KonlpyTextSplitter",
"SpacyTextSplitter", "SpacyTextSplitter",

View File

@ -0,0 +1,98 @@
import re
from typing import Any, List, Optional
from langchain_text_splitters import RecursiveCharacterTextSplitter
class JSFrameworkTextSplitter(RecursiveCharacterTextSplitter):
"""Text splitter that handles React (JSX), Vue, and Svelte code.
This splitter extends RecursiveCharacterTextSplitter to handle
React (JSX), Vue, and Svelte code by:
1. Detecting and extracting custom component tags from the text
2. Using those tags as additional separators along with standard JS syntax
The splitter combines:
- Custom component tags as separators (e.g. <Component, <div)
- JavaScript syntax elements (function, const, if, etc)
- Standard text splitting on newlines
This allows chunks to break at natural boundaries in
React, Vue, and Svelte component code.
"""
def __init__(
self,
separators: Optional[List[str]] = None,
chunk_size: int = 2000,
chunk_overlap: int = 0,
**kwargs: Any,
) -> None:
"""Initialize the JS Framework text splitter.
Args:
separators: Optional list of custom separator strings to use
chunk_size: Maximum size of chunks to return
chunk_overlap: Overlap in characters between chunks
**kwargs: Additional arguments to pass to parent class
"""
super().__init__(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
self._separators = separators or []
def split_text(self, text: str) -> List[str]:
"""Split text into chunks.
This method splits the text into chunks by:
- Extracting unique opening component tags using regex
- Creating separators list with extracted tags and JS separators
- Splitting the text using the separators by calling the parent class method
Args:
text: String containing code to split
Returns:
List of text chunks split on component and JS boundaries
"""
# Extract unique opening component tags using regex
# Regex to match opening tags, excluding self-closing tags
opening_tags = re.findall(r"<\s*([a-zA-Z0-9]+)[^>]*>", text)
component_tags = []
for tag in opening_tags:
if tag not in component_tags:
component_tags.append(tag)
component_separators = [f"<{tag}" for tag in component_tags]
js_separators = [
"\nexport ",
" export ",
"\nfunction ",
"\nasync function ",
" async function ",
"\nconst ",
"\nlet ",
"\nvar ",
"\nclass ",
" class ",
"\nif ",
" if ",
"\nfor ",
" for ",
"\nwhile ",
" while ",
"\nswitch ",
" switch ",
"\ncase ",
" case ",
"\ndefault ",
" default ",
]
separators = (
self._separators
+ js_separators
+ component_separators
+ ["<>", "\n\n", "&&\n", "||\n"]
)
self._separators = separators
chunks = super().split_text(text)
return chunks

View File

@ -23,6 +23,7 @@ from langchain_text_splitters.html import (
HTMLSemanticPreservingSplitter, HTMLSemanticPreservingSplitter,
) )
from langchain_text_splitters.json import RecursiveJsonSplitter from langchain_text_splitters.json import RecursiveJsonSplitter
from langchain_text_splitters.jsx import JSFrameworkTextSplitter
from langchain_text_splitters.markdown import ( from langchain_text_splitters.markdown import (
ExperimentalMarkdownSyntaxTextSplitter, ExperimentalMarkdownSyntaxTextSplitter,
MarkdownHeaderTextSplitter, MarkdownHeaderTextSplitter,
@ -413,6 +414,144 @@ def test_python_text_splitter() -> None:
assert splits == expected_splits assert splits == expected_splits
FAKE_JSX_TEXT = """
import React from 'react';
import OtherComponent from './OtherComponent';
function MyComponent() {
const [count, setCount] = React.useState(0);
const handleClick = () => {
setCount(count + 1);
};
return (
<div>
<h1>Counter: {count}</h1>
<button onClick={handleClick}>
Increment
</button>
<OtherComponent />
</div>
);
}
export default MyComponent;
"""
def test_jsx_text_splitter() -> None:
splitter = JSFrameworkTextSplitter(chunk_size=30, chunk_overlap=0)
splits = splitter.split_text(FAKE_JSX_TEXT)
expected_splits = [
"\nimport React from 'react';\n"
"import OtherComponent from './OtherComponent';\n",
"\nfunction MyComponent() {\n const [count, setCount] = React.useState(0);",
"\n\n const handleClick = () => {\n setCount(count + 1);\n };",
"return (",
"<div>",
"<h1>Counter: {count}</h1>\n ",
"<button onClick={handleClick}>\n Increment\n </button>\n ",
"<OtherComponent />\n </div>\n );\n}\n",
"export default MyComponent;",
]
assert [s.strip() for s in splits] == [s.strip() for s in expected_splits]
FAKE_VUE_TEXT = """
<template>
<div>
<h1>{{ title }}</h1>
<button @click="increment">
Count is: {{ count }}
</button>
</div>
</template>
<script>
export default {
data() {
return {
title: 'Counter App',
count: 0
}
},
methods: {
increment() {
this.count++
}
}
}
</script>
<style>
button {
color: blue;
}
</style>
"""
def test_vue_text_splitter() -> None:
splitter = JSFrameworkTextSplitter(chunk_size=30, chunk_overlap=0)
splits = splitter.split_text(FAKE_VUE_TEXT)
expected_splits = [
"<template>",
"<div>",
"<h1>{{ title }}</h1>",
'<button @click="increment">\n Count is: {{ count }}\n'
" </button>\n </div>\n</template>",
"<script>",
"export",
" default {\n data() {\n return {\n title: 'Counter App',\n "
"count: 0\n }\n },\n methods: {\n increment() {\n "
"this.count++\n }\n }\n}\n</script>",
"<style>\nbutton {\n color: blue;\n}\n</style>",
]
assert [s.strip() for s in splits] == [s.strip() for s in expected_splits]
FAKE_SVELTE_TEXT = """
<script>
let count = 0
function increment() {
count += 1
}
</script>
<main>
<h1>Counter App</h1>
<button on:click={increment}>
Count is: {count}
</button>
</main>
<style>
button {
color: blue;
}
</style>
"""
def test_svelte_text_splitter() -> None:
splitter = JSFrameworkTextSplitter(chunk_size=30, chunk_overlap=0)
splits = splitter.split_text(FAKE_SVELTE_TEXT)
expected_splits = [
"<script>\n let count = 0",
"\n\n function increment() {\n count += 1\n }\n</script>",
"<main>",
"<h1>Counter App</h1>",
"<button on:click={increment}>\n Count is: {count}\n </button>\n</main>",
"<style>\n button {\n color: blue;\n }\n</style>",
]
assert [s.strip() for s in splits] == [s.strip() for s in expected_splits]
CHUNK_SIZE = 16 CHUNK_SIZE = 16