From 23f701b08e0e1025cac06f70e44fd51c2abde7ce Mon Sep 17 00:00:00 2001 From: Aubrey Ford Date: Mon, 21 Apr 2025 05:39:07 -0700 Subject: [PATCH 1/8] langchain_community: OpenAIEmbeddings not respecting chunk_size argument (#30946) This is a follow-on PR to go with the identical changes that were made in parters/openai. Previous PR: https://github.com/langchain-ai/langchain/pull/30757 When calling embed_documents and providing a chunk_size argument, that argument is ignored when OpenAIEmbeddings is instantiated with its default configuration (where check_embedding_ctx_length=True). _get_len_safe_embeddings specifies a chunk_size parameter but it's not being passed through in embed_documents, which is its only caller. This appears to be an oversight, especially given that the _get_len_safe_embeddings docstring states it should respect "the set embedding context length and chunk size." Developers typically expect method parameters to take effect (also, take precedence) when explicitly provided, especially when instantiating using defaults. I was confused as to why my API calls were being rejected regardless of the chunk size I provided. --- .../langchain_community/embeddings/openai.py | 8 +++++-- .../unit_tests/embeddings/test_openai.py | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/embeddings/openai.py b/libs/community/langchain_community/embeddings/openai.py index 126fe564ae5..5bedfc747ee 100644 --- a/libs/community/langchain_community/embeddings/openai.py +++ b/libs/community/langchain_community/embeddings/openai.py @@ -668,7 +668,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings): # NOTE: to keep things simple, we assume the list may contain texts longer # than the maximum context and use length-safe embedding function. engine = cast(str, self.deployment) - return self._get_len_safe_embeddings(texts, engine=engine) + return self._get_len_safe_embeddings( + texts, engine=engine, chunk_size=chunk_size + ) async def aembed_documents( self, texts: List[str], chunk_size: Optional[int] = 0 @@ -686,7 +688,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings): # NOTE: to keep things simple, we assume the list may contain texts longer # than the maximum context and use length-safe embedding function. engine = cast(str, self.deployment) - return await self._aget_len_safe_embeddings(texts, engine=engine) + return self._get_len_safe_embeddings( + texts, engine=engine, chunk_size=chunk_size + ) def embed_query(self, text: str) -> List[float]: """Call out to OpenAI's embedding endpoint for embedding query text. diff --git a/libs/community/tests/unit_tests/embeddings/test_openai.py b/libs/community/tests/unit_tests/embeddings/test_openai.py index e62bee5f945..8d349e67387 100644 --- a/libs/community/tests/unit_tests/embeddings/test_openai.py +++ b/libs/community/tests/unit_tests/embeddings/test_openai.py @@ -1,7 +1,12 @@ +import os +from unittest.mock import patch + import pytest from langchain_community.embeddings.openai import OpenAIEmbeddings +os.environ["OPENAI_API_KEY"] = "foo" + @pytest.mark.requires("openai") def test_openai_invalid_model_kwargs() -> None: @@ -14,3 +19,20 @@ def test_openai_incorrect_field() -> None: with pytest.warns(match="not default parameter"): llm = OpenAIEmbeddings(foo="bar", openai_api_key="foo") # type: ignore[call-arg] assert llm.model_kwargs == {"foo": "bar"} + + +@pytest.mark.requires("openai") +def test_embed_documents_with_custom_chunk_size() -> None: + embeddings = OpenAIEmbeddings(chunk_size=2) + texts = ["text1", "text2", "text3", "text4"] + custom_chunk_size = 3 + + with patch.object(embeddings.client, "create") as mock_create: + mock_create.side_effect = [ + {"data": [{"embedding": [0.1, 0.2]}, {"embedding": [0.3, 0.4]}]}, + {"data": [{"embedding": [0.5, 0.6]}, {"embedding": [0.7, 0.8]}]}, + ] + + embeddings.embed_documents(texts, chunk_size=custom_chunk_size) + mock_create.assert_any_call(input=[[1342, 19]], **embeddings._invocation_params) + mock_create.assert_any_call(input=[[1342, 19]], **embeddings._invocation_params) From 9418c0d8a50e093b08d05b03d41ed2d0c9464ebf Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Mon, 21 Apr 2025 21:43:29 +0900 Subject: [PATCH 2/8] docs: update tableau.ipynb (#30938) Initalize -> Initialize --- docs/docs/integrations/tools/tableau.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/integrations/tools/tableau.ipynb b/docs/docs/integrations/tools/tableau.ipynb index a9c59951ff1..33584fe555f 100644 --- a/docs/docs/integrations/tools/tableau.ipynb +++ b/docs/docs/integrations/tools/tableau.ipynb @@ -183,7 +183,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Initalize simple_datasource_qa for querying Tableau Datasources through VDS\n", + "# Initialize simple_datasource_qa for querying Tableau Datasources through VDS\n", "analyze_datasource = initialize_simple_datasource_qa(\n", " domain=tableau_server,\n", " site=tableau_site,\n", From 335f089d6a987bbf3ce675506e66629e73a7c69e Mon Sep 17 00:00:00 2001 From: xsai9101 Date: Mon, 21 Apr 2025 05:47:33 -0700 Subject: [PATCH 3/8] Community: Add bind variable support for oracle adb docloader (#30937) PR title: Community: Add bind variable support for oracle adb docloader Description: This PR adds support of using bind variable to oracle adb doc loader class, including minor document change. Issue: N/A Dependencies: No new dependencies. --- .../document_loaders/oracleadb_loader.ipynb | 22 ++++++++----------- .../document_loaders/oracleadb_loader.py | 12 ++++++++-- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/docs/docs/integrations/document_loaders/oracleadb_loader.ipynb b/docs/docs/integrations/document_loaders/oracleadb_loader.ipynb index 7988b1cdd71..b1f6577254a 100644 --- a/docs/docs/integrations/document_loaders/oracleadb_loader.ipynb +++ b/docs/docs/integrations/document_loaders/oracleadb_loader.ipynb @@ -36,10 +36,7 @@ "pip install oracledb" ], "metadata": { - "collapsed": false, - "pycharm": { - "is_executing": true - } + "collapsed": false } }, { @@ -51,10 +48,7 @@ "from settings import s" ], "metadata": { - "collapsed": false, - "pycharm": { - "is_executing": true - } + "collapsed": false } }, { @@ -97,16 +91,14 @@ "doc_2 = doc_loader_2.load()" ], "metadata": { - "collapsed": false, - "pycharm": { - "is_executing": true - } + "collapsed": false } }, { "cell_type": "markdown", "source": [ - "With TLS authentication, wallet_location and wallet_password are not required." + "With TLS authentication, wallet_location and wallet_password are not required.\n", + "Bind variable option is provided by argument \"parameters\"." ], "metadata": { "collapsed": false @@ -117,6 +109,8 @@ "execution_count": null, "outputs": [], "source": [ + "SQL_QUERY = \"select channel_id, channel_desc from sh.channels where channel_desc = :1 fetch first 5 rows only\"\n", + "\n", "doc_loader_3 = OracleAutonomousDatabaseLoader(\n", " query=SQL_QUERY,\n", " user=s.USERNAME,\n", @@ -124,6 +118,7 @@ " schema=s.SCHEMA,\n", " config_dir=s.CONFIG_DIR,\n", " tns_name=s.TNS_NAME,\n", + " parameters=[\"Direct Sales\"],\n", ")\n", "doc_3 = doc_loader_3.load()\n", "\n", @@ -133,6 +128,7 @@ " password=s.PASSWORD,\n", " schema=s.SCHEMA,\n", " connection_string=s.CONNECTION_STRING,\n", + " parameters=[\"Direct Sales\"],\n", ")\n", "doc_4 = doc_loader_4.load()" ], diff --git a/libs/community/langchain_community/document_loaders/oracleadb_loader.py b/libs/community/langchain_community/document_loaders/oracleadb_loader.py index ebf0c446c44..f9a60b013fa 100644 --- a/libs/community/langchain_community/document_loaders/oracleadb_loader.py +++ b/libs/community/langchain_community/document_loaders/oracleadb_loader.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from langchain_core.documents import Document @@ -31,6 +31,7 @@ class OracleAutonomousDatabaseLoader(BaseLoader): wallet_password: Optional[str] = None, connection_string: Optional[str] = None, metadata: Optional[List[str]] = None, + parameters: Optional[Union[list, tuple, dict]] = None, ): """ init method @@ -44,6 +45,7 @@ class OracleAutonomousDatabaseLoader(BaseLoader): :param wallet_password: password of wallet :param connection_string: connection string to connect to adb instance :param metadata: metadata used in document + :param parameters: bind variable to use in query """ # Mandatory required arguments. self.query = query @@ -67,6 +69,9 @@ class OracleAutonomousDatabaseLoader(BaseLoader): # metadata column self.metadata = metadata + # parameters, e.g bind variable + self.parameters = parameters + # dsn self.dsn: Optional[str] self._set_dsn() @@ -96,7 +101,10 @@ class OracleAutonomousDatabaseLoader(BaseLoader): cursor = connection.cursor() if self.schema: cursor.execute(f"alter session set current_schema={self.schema}") - cursor.execute(self.query) + if self.parameters: + cursor.execute(self.query, self.parameters) + else: + cursor.execute(self.query) columns = [col[0] for col in cursor.description] data = cursor.fetchall() data = [ From de56c31672fa5151043e76beca3f226ae84969b0 Mon Sep 17 00:00:00 2001 From: Ahmed Tammaa Date: Mon, 21 Apr 2025 16:06:18 +0200 Subject: [PATCH 4/8] core: Improve OutputParser error messaging when model output is truncated (max_tokens) (#30936) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses #30158 When using the output parser—either in a chain or standalone—hitting max_tokens triggers a misleading “missing variable” error instead of indicating the output was truncated. This subtle bug often surfaces with Anthropic models. --------- Co-authored-by: Chester Curme --- .../output_parsers/openai_tools.py | 19 +++++++++++++++ .../output_parsers/test_openai_tools.py | 23 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/libs/core/langchain_core/output_parsers/openai_tools.py b/libs/core/langchain_core/output_parsers/openai_tools.py index 254d11eca1f..b4f845bdcad 100644 --- a/libs/core/langchain_core/output_parsers/openai_tools.py +++ b/libs/core/langchain_core/output_parsers/openai_tools.py @@ -2,6 +2,7 @@ import copy import json +import logging from json import JSONDecodeError from typing import Annotated, Any, Optional @@ -16,6 +17,8 @@ from langchain_core.outputs import ChatGeneration, Generation from langchain_core.utils.json import parse_partial_json from langchain_core.utils.pydantic import TypeBaseModel +logger = logging.getLogger(__name__) + def parse_tool_call( raw_tool_call: dict[str, Any], @@ -250,6 +253,14 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser): return parsed_result +# Common cause of ValidationError is truncated output due to max_tokens. +_MAX_TOKENS_ERROR = ( + "Output parser received a `max_tokens` stop reason. " + "The output is likely incomplete—please increase `max_tokens` " + "or shorten your prompt." +) + + class PydanticToolsParser(JsonOutputToolsParser): """Parse tools from OpenAI response.""" @@ -296,6 +307,14 @@ class PydanticToolsParser(JsonOutputToolsParser): except (ValidationError, ValueError): if partial: continue + has_max_tokens_stop_reason = any( + generation.message.response_metadata.get("stop_reason") + == "max_tokens" + for generation in result + if isinstance(generation, ChatGeneration) + ) + if has_max_tokens_stop_reason: + logger.exception(_MAX_TOKENS_ERROR) raise if self.first_tool_only: return pydantic_objects[0] if pydantic_objects else None diff --git a/libs/core/tests/unit_tests/output_parsers/test_openai_tools.py b/libs/core/tests/unit_tests/output_parsers/test_openai_tools.py index c63dd693d74..992e5c48c55 100644 --- a/libs/core/tests/unit_tests/output_parsers/test_openai_tools.py +++ b/libs/core/tests/unit_tests/output_parsers/test_openai_tools.py @@ -2,7 +2,7 @@ from collections.abc import AsyncIterator, Iterator from typing import Any import pytest -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ValidationError from langchain_core.messages import ( AIMessage, @@ -635,3 +635,24 @@ def test_parse_with_different_pydantic_1_proper() -> None: forecast="Sunny", ) ] + + +def test_max_tokens_error(caplog: Any) -> None: + parser = PydanticToolsParser(tools=[NameCollector], first_tool_only=True) + input = AIMessage( + content="", + tool_calls=[ + { + "id": "call_OwL7f5PE", + "name": "NameCollector", + "args": {"names": ["suz", "jerm"]}, + } + ], + response_metadata={"stop_reason": "max_tokens"}, + ) + with pytest.raises(ValidationError): + _ = parser.invoke(input) + assert any( + "`max_tokens` stop reason" in msg and record.levelname == "ERROR" + for record, msg in zip(caplog.records, caplog.messages) + ) From 0e9d0dbc10296a84f29f5f6ef6b7f0c5a3b4dff3 Mon Sep 17 00:00:00 2001 From: Pushpa Kumar Date: Mon, 21 Apr 2025 09:10:14 -0500 Subject: [PATCH 5/8] docs: dynamic copyright year in API ref (#30944) - [x] **PR title:** `docs: make footer year dynamic in API reference docs` - [x] **PR message:** - **Description:** Update `docs/api_reference/conf.py` to make the copyright year dynamic (on [https://python.langchain.com/api_reference/](https://python.langchain.com/api_reference/)). --------- Co-authored-by: Chester Curme --- docs/api_reference/conf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/api_reference/conf.py b/docs/api_reference/conf.py index 514a17ad9b0..22d59f0badf 100644 --- a/docs/api_reference/conf.py +++ b/docs/api_reference/conf.py @@ -11,6 +11,7 @@ import json import os import sys +from datetime import datetime from pathlib import Path import toml @@ -104,7 +105,7 @@ def skip_private_members(app, what, name, obj, skip, options): # -- Project information ----------------------------------------------------- project = "🦜🔗 LangChain" -copyright = "2023, LangChain Inc" +copyright = f"{datetime.now().year}, LangChain Inc" author = "LangChain, Inc" html_favicon = "_static/img/brand/favicon.png" From 27296bdb0cbfc96f7d49ef048e662c239da20aa9 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Mon, 21 Apr 2025 07:18:36 -0700 Subject: [PATCH 6/8] core: Make Graph.Node.data optional (#30943) Thank you for contributing to LangChain! - [ ] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, etc. is being modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, eyurtsev, ccurme, vbarda, hwchase17. --- libs/core/langchain_core/runnables/graph.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/libs/core/langchain_core/runnables/graph.py b/libs/core/langchain_core/runnables/graph.py index eeac8fc51ac..323dcdd2d48 100644 --- a/libs/core/langchain_core/runnables/graph.py +++ b/libs/core/langchain_core/runnables/graph.py @@ -108,7 +108,7 @@ class Node(NamedTuple): id: str name: str - data: Union[type[BaseModel], RunnableType] + data: Union[type[BaseModel], RunnableType, None] metadata: Optional[dict[str, Any]] def copy(self, *, id: Optional[str] = None, name: Optional[str] = None) -> Node: @@ -181,7 +181,7 @@ class MermaidDrawMethod(Enum): API = "api" # Uses Mermaid.INK API to render the graph -def node_data_str(id: str, data: Union[type[BaseModel], RunnableType]) -> str: +def node_data_str(id: str, data: Union[type[BaseModel], RunnableType, None]) -> str: """Convert the data of a node to a string. Args: @@ -193,7 +193,7 @@ def node_data_str(id: str, data: Union[type[BaseModel], RunnableType]) -> str: """ from langchain_core.runnables.base import Runnable - if not is_uuid(id): + if not is_uuid(id) or data is None: return id data_str = data.get_name() if isinstance(data, Runnable) else data.__name__ return data_str if not data_str.startswith("Runnable") else data_str[8:] @@ -215,8 +215,10 @@ def node_data_json( from langchain_core.load.serializable import to_json_not_implemented from langchain_core.runnables.base import Runnable, RunnableSerializable - if isinstance(node.data, RunnableSerializable): - json: dict[str, Any] = { + if node.data is None: + json: dict[str, Any] = {} + elif isinstance(node.data, RunnableSerializable): + json = { "type": "runnable", "data": { "id": node.data.lc_id(), @@ -317,7 +319,7 @@ class Graph: def add_node( self, - data: Union[type[BaseModel], RunnableType], + data: Union[type[BaseModel], RunnableType, None], id: Optional[str] = None, *, metadata: Optional[dict[str, Any]] = None, From 589bc1989050f01e5067ad15b24c904ec59da211 Mon Sep 17 00:00:00 2001 From: Ahmed Tammaa Date: Mon, 21 Apr 2025 16:44:39 +0200 Subject: [PATCH 7/8] anthropic[patch]: make description optional on AnthropicTool (#30935) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR Summary This change adds a fallback in ChatAnthropic.with_structured_output() to handle Pydantic models that don’t include a docstring. Without it, calling: ```py from pydantic import BaseModel from langchain_anthropic import ChatAnthropic class SampleModel(BaseModel): sample_field: str llm = ChatAnthropic( model="claude-3-7-sonnet-latest" ).with_structured_output(SampleModel.model_json_schema()) llm.invoke("test") ``` will raise a ``` KeyError: 'description' ``` because Pydantic omits the description field when no docstring is present. This issue doesn’t occur when using ChatOpenAI or if you add a docstring to the model: ```py from pydantic import BaseModel from langchain_openai import ChatOpenAI class SampleModel(BaseModel): """Schema for sample_field output.""" sample_field: str llm = ChatOpenAI( model="gpt-4o-mini" ).with_structured_output(SampleModel.model_json_schema()) llm.invoke("test") ``` --------- Co-authored-by: Chester Curme --- .../anthropic/langchain_anthropic/chat_models.py | 5 +++-- .../anthropic/tests/unit_tests/test_chat_models.py | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 5ca29c20bce..f872fe32c18 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -82,8 +82,8 @@ class AnthropicTool(TypedDict): """Anthropic tool definition.""" name: str - description: str input_schema: dict[str, Any] + description: NotRequired[str] cache_control: NotRequired[dict[str, str]] @@ -1675,9 +1675,10 @@ def convert_to_anthropic_tool( oai_formatted = convert_to_openai_tool(tool)["function"] anthropic_formatted = AnthropicTool( name=oai_formatted["name"], - description=oai_formatted["description"], input_schema=oai_formatted["parameters"], ) + if "description" in oai_formatted: + anthropic_formatted["description"] = oai_formatted["description"] return anthropic_formatted diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py index f9225296096..008b2be1cb6 100644 --- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py @@ -931,3 +931,12 @@ def test_anthropic_bind_tools_tool_choice() -> None: assert cast(RunnableBinding, chat_model_with_tools).kwargs["tool_choice"] == { "type": "any" } + + +def test_optional_description() -> None: + llm = ChatAnthropic(model="claude-3-5-haiku-latest") + + class SampleModel(BaseModel): + sample_field: str + + _ = llm.with_structured_output(SampleModel.model_json_schema()) From 1f3054502e6a066cd0503c92f97a2ce896f8d397 Mon Sep 17 00:00:00 2001 From: Anton Masalovich Date: Mon, 21 Apr 2025 15:59:47 +0100 Subject: [PATCH 8/8] community: fix cost calculations for 4.1 and o4 in OpenAI callback (#30899) **Issue:** #30898 --- .../callbacks/openai_info.py | 140 ++++++++++++++++-- 1 file changed, 124 insertions(+), 16 deletions(-) diff --git a/libs/community/langchain_community/callbacks/openai_info.py b/libs/community/langchain_community/callbacks/openai_info.py index fbf3011e67a..5889fa1303f 100644 --- a/libs/community/langchain_community/callbacks/openai_info.py +++ b/libs/community/langchain_community/callbacks/openai_info.py @@ -10,6 +10,38 @@ from langchain_core.messages import AIMessage from langchain_core.outputs import ChatGeneration, LLMResult MODEL_COST_PER_1K_TOKENS = { + # GPT-4.1 input + "gpt-4.1": 0.002, + "gpt-4.1-2025-04-14": 0.002, + "gpt-4.1-cached": 0.0005, + "gpt-4.1-2025-04-14-cached": 0.0005, + # GPT-4.1 output + "gpt-4.1-completion": 0.008, + "gpt-4.1-2025-04-14-completion": 0.008, + # GPT-4.1-mini input + "gpt-4.1-mini": 0.0004, + "gpt-4.1-mini-2025-04-14": 0.0004, + "gpt-4.1-mini-cached": 0.0001, + "gpt-4.1-mini-2025-04-14-cached": 0.0001, + # GPT-4.1-mini output + "gpt-4.1-mini-completion": 0.0016, + "gpt-4.1-mini-2025-04-14-completion": 0.0016, + # GPT-4.1-nano input + "gpt-4.1-nano": 0.0001, + "gpt-4.1-nano-2025-04-14": 0.0001, + "gpt-4.1-nano-cached": 0.000025, + "gpt-4.1-nano-2025-04-14-cached": 0.000025, + # GPT-4.1-nano output + "gpt-4.1-nano-completion": 0.0004, + "gpt-4.1-nano-2025-04-14-completion": 0.0004, + # GPT-4.5-preview input + "gpt-4.5-preview": 0.075, + "gpt-4.5-preview-2025-02-27": 0.075, + "gpt-4.5-preview-cached": 0.0375, + "gpt-4.5-preview-2025-02-27-cached": 0.0375, + # GPT-4.5-preview output + "gpt-4.5-preview-completion": 0.15, + "gpt-4.5-preview-2025-02-27-completion": 0.15, # OpenAI o1 input "o1": 0.015, "o1-2024-12-17": 0.015, @@ -18,6 +50,28 @@ MODEL_COST_PER_1K_TOKENS = { # OpenAI o1 output "o1-completion": 0.06, "o1-2024-12-17-completion": 0.06, + # OpenAI o1-pro input + "o1-pro": 0.15, + "o1-pro-2025-03-19": 0.15, + # OpenAI o1-pro output + "o1-pro-completion": 0.6, + "o1-pro-2025-03-19-completion": 0.6, + # OpenAI o3 input + "o3": 0.01, + "o3-2025-04-16": 0.01, + "o3-cached": 0.0025, + "o3-2025-04-16-cached": 0.0025, + # OpenAI o3 output + "o3-completion": 0.04, + "o3-2025-04-16-completion": 0.04, + # OpenAI o4-mini input + "o4-mini": 0.0011, + "o4-mini-2025-04-16": 0.0011, + "o4-mini-cached": 0.000275, + "o4-mini-2025-04-16-cached": 0.000275, + # OpenAI o4-mini output + "o4-mini-completion": 0.0044, + "o4-mini-2025-04-16-completion": 0.0044, # OpenAI o3-mini input "o3-mini": 0.0011, "o3-mini-2025-01-31": 0.0011, @@ -26,6 +80,14 @@ MODEL_COST_PER_1K_TOKENS = { # OpenAI o3-mini output "o3-mini-completion": 0.0044, "o3-mini-2025-01-31-completion": 0.0044, + # OpenAI o1-mini input (updated pricing) + "o1-mini": 0.0011, + "o1-mini-cached": 0.00055, + "o1-mini-2024-09-12": 0.0011, + "o1-mini-2024-09-12-cached": 0.00055, + # OpenAI o1-mini output (updated pricing) + "o1-mini-completion": 0.0044, + "o1-mini-2024-09-12-completion": 0.0044, # OpenAI o1-preview input "o1-preview": 0.015, "o1-preview-cached": 0.0075, @@ -34,22 +96,6 @@ MODEL_COST_PER_1K_TOKENS = { # OpenAI o1-preview output "o1-preview-completion": 0.06, "o1-preview-2024-09-12-completion": 0.06, - # OpenAI o1-mini input - "o1-mini": 0.003, - "o1-mini-cached": 0.0015, - "o1-mini-2024-09-12": 0.003, - "o1-mini-2024-09-12-cached": 0.0015, - # OpenAI o1-mini output - "o1-mini-completion": 0.012, - "o1-mini-2024-09-12-completion": 0.012, - # GPT-4o-mini input - "gpt-4o-mini": 0.00015, - "gpt-4o-mini-cached": 0.000075, - "gpt-4o-mini-2024-07-18": 0.00015, - "gpt-4o-mini-2024-07-18-cached": 0.000075, - # GPT-4o-mini output - "gpt-4o-mini-completion": 0.0006, - "gpt-4o-mini-2024-07-18-completion": 0.0006, # GPT-4o input "gpt-4o": 0.0025, "gpt-4o-cached": 0.00125, @@ -63,6 +109,65 @@ MODEL_COST_PER_1K_TOKENS = { "gpt-4o-2024-05-13-completion": 0.015, "gpt-4o-2024-08-06-completion": 0.01, "gpt-4o-2024-11-20-completion": 0.01, + # GPT-4o-audio-preview input + "gpt-4o-audio-preview": 0.0025, + "gpt-4o-audio-preview-2024-12-17": 0.0025, + "gpt-4o-audio-preview-2024-10-01": 0.0025, + # GPT-4o-audio-preview output + "gpt-4o-audio-preview-completion": 0.01, + "gpt-4o-audio-preview-2024-12-17-completion": 0.01, + "gpt-4o-audio-preview-2024-10-01-completion": 0.01, + # GPT-4o-realtime-preview input + "gpt-4o-realtime-preview": 0.005, + "gpt-4o-realtime-preview-2024-12-17": 0.005, + "gpt-4o-realtime-preview-2024-10-01": 0.005, + "gpt-4o-realtime-preview-cached": 0.0025, + "gpt-4o-realtime-preview-2024-12-17-cached": 0.0025, + "gpt-4o-realtime-preview-2024-10-01-cached": 0.0025, + # GPT-4o-realtime-preview output + "gpt-4o-realtime-preview-completion": 0.02, + "gpt-4o-realtime-preview-2024-12-17-completion": 0.02, + "gpt-4o-realtime-preview-2024-10-01-completion": 0.02, + # GPT-4o-mini input + "gpt-4o-mini": 0.00015, + "gpt-4o-mini-cached": 0.000075, + "gpt-4o-mini-2024-07-18": 0.00015, + "gpt-4o-mini-2024-07-18-cached": 0.000075, + # GPT-4o-mini output + "gpt-4o-mini-completion": 0.0006, + "gpt-4o-mini-2024-07-18-completion": 0.0006, + # GPT-4o-mini-audio-preview input + "gpt-4o-mini-audio-preview": 0.00015, + "gpt-4o-mini-audio-preview-2024-12-17": 0.00015, + # GPT-4o-mini-audio-preview output + "gpt-4o-mini-audio-preview-completion": 0.0006, + "gpt-4o-mini-audio-preview-2024-12-17-completion": 0.0006, + # GPT-4o-mini-realtime-preview input + "gpt-4o-mini-realtime-preview": 0.0006, + "gpt-4o-mini-realtime-preview-2024-12-17": 0.0006, + "gpt-4o-mini-realtime-preview-cached": 0.0003, + "gpt-4o-mini-realtime-preview-2024-12-17-cached": 0.0003, + # GPT-4o-mini-realtime-preview output + "gpt-4o-mini-realtime-preview-completion": 0.0024, + "gpt-4o-mini-realtime-preview-2024-12-17-completion": 0.0024, + # GPT-4o-mini-search-preview input + "gpt-4o-mini-search-preview": 0.00015, + "gpt-4o-mini-search-preview-2025-03-11": 0.00015, + # GPT-4o-mini-search-preview output + "gpt-4o-mini-search-preview-completion": 0.0006, + "gpt-4o-mini-search-preview-2025-03-11-completion": 0.0006, + # GPT-4o-search-preview input + "gpt-4o-search-preview": 0.0025, + "gpt-4o-search-preview-2025-03-11": 0.0025, + # GPT-4o-search-preview output + "gpt-4o-search-preview-completion": 0.01, + "gpt-4o-search-preview-2025-03-11-completion": 0.01, + # Computer-use-preview input + "computer-use-preview": 0.003, + "computer-use-preview-2025-03-11": 0.003, + # Computer-use-preview output + "computer-use-preview-completion": 0.012, + "computer-use-preview-2025-03-11-completion": 0.012, # GPT-4 input "gpt-4": 0.03, "gpt-4-0314": 0.03, @@ -219,6 +324,7 @@ def standardize_model_name( or model_name.startswith("gpt-35") or model_name.startswith("o1-") or model_name.startswith("o3-") + or model_name.startswith("o4-") or ("finetuned" in model_name and "legacy" not in model_name) ): return model_name + "-completion" @@ -226,8 +332,10 @@ def standardize_model_name( token_type == TokenType.PROMPT_CACHED and ( model_name.startswith("gpt-4o") + or model_name.startswith("gpt-4.1") or model_name.startswith("o1") or model_name.startswith("o3") + or model_name.startswith("o4") ) and not (model_name.startswith("gpt-4o-2024-05-13")) ):