From 1f3054502e6a066cd0503c92f97a2ce896f8d397 Mon Sep 17 00:00:00 2001 From: Anton Masalovich Date: Mon, 21 Apr 2025 15:59:47 +0100 Subject: [PATCH] community: fix cost calculations for 4.1 and o4 in OpenAI callback (#30899) **Issue:** #30898 --- .../callbacks/openai_info.py | 140 ++++++++++++++++-- 1 file changed, 124 insertions(+), 16 deletions(-) diff --git a/libs/community/langchain_community/callbacks/openai_info.py b/libs/community/langchain_community/callbacks/openai_info.py index fbf3011e67a..5889fa1303f 100644 --- a/libs/community/langchain_community/callbacks/openai_info.py +++ b/libs/community/langchain_community/callbacks/openai_info.py @@ -10,6 +10,38 @@ from langchain_core.messages import AIMessage from langchain_core.outputs import ChatGeneration, LLMResult MODEL_COST_PER_1K_TOKENS = { + # GPT-4.1 input + "gpt-4.1": 0.002, + "gpt-4.1-2025-04-14": 0.002, + "gpt-4.1-cached": 0.0005, + "gpt-4.1-2025-04-14-cached": 0.0005, + # GPT-4.1 output + "gpt-4.1-completion": 0.008, + "gpt-4.1-2025-04-14-completion": 0.008, + # GPT-4.1-mini input + "gpt-4.1-mini": 0.0004, + "gpt-4.1-mini-2025-04-14": 0.0004, + "gpt-4.1-mini-cached": 0.0001, + "gpt-4.1-mini-2025-04-14-cached": 0.0001, + # GPT-4.1-mini output + "gpt-4.1-mini-completion": 0.0016, + "gpt-4.1-mini-2025-04-14-completion": 0.0016, + # GPT-4.1-nano input + "gpt-4.1-nano": 0.0001, + "gpt-4.1-nano-2025-04-14": 0.0001, + "gpt-4.1-nano-cached": 0.000025, + "gpt-4.1-nano-2025-04-14-cached": 0.000025, + # GPT-4.1-nano output + "gpt-4.1-nano-completion": 0.0004, + "gpt-4.1-nano-2025-04-14-completion": 0.0004, + # GPT-4.5-preview input + "gpt-4.5-preview": 0.075, + "gpt-4.5-preview-2025-02-27": 0.075, + "gpt-4.5-preview-cached": 0.0375, + "gpt-4.5-preview-2025-02-27-cached": 0.0375, + # GPT-4.5-preview output + "gpt-4.5-preview-completion": 0.15, + "gpt-4.5-preview-2025-02-27-completion": 0.15, # OpenAI o1 input "o1": 0.015, "o1-2024-12-17": 0.015, @@ -18,6 +50,28 @@ MODEL_COST_PER_1K_TOKENS = { # OpenAI o1 output "o1-completion": 0.06, "o1-2024-12-17-completion": 0.06, + # OpenAI o1-pro input + "o1-pro": 0.15, + "o1-pro-2025-03-19": 0.15, + # OpenAI o1-pro output + "o1-pro-completion": 0.6, + "o1-pro-2025-03-19-completion": 0.6, + # OpenAI o3 input + "o3": 0.01, + "o3-2025-04-16": 0.01, + "o3-cached": 0.0025, + "o3-2025-04-16-cached": 0.0025, + # OpenAI o3 output + "o3-completion": 0.04, + "o3-2025-04-16-completion": 0.04, + # OpenAI o4-mini input + "o4-mini": 0.0011, + "o4-mini-2025-04-16": 0.0011, + "o4-mini-cached": 0.000275, + "o4-mini-2025-04-16-cached": 0.000275, + # OpenAI o4-mini output + "o4-mini-completion": 0.0044, + "o4-mini-2025-04-16-completion": 0.0044, # OpenAI o3-mini input "o3-mini": 0.0011, "o3-mini-2025-01-31": 0.0011, @@ -26,6 +80,14 @@ MODEL_COST_PER_1K_TOKENS = { # OpenAI o3-mini output "o3-mini-completion": 0.0044, "o3-mini-2025-01-31-completion": 0.0044, + # OpenAI o1-mini input (updated pricing) + "o1-mini": 0.0011, + "o1-mini-cached": 0.00055, + "o1-mini-2024-09-12": 0.0011, + "o1-mini-2024-09-12-cached": 0.00055, + # OpenAI o1-mini output (updated pricing) + "o1-mini-completion": 0.0044, + "o1-mini-2024-09-12-completion": 0.0044, # OpenAI o1-preview input "o1-preview": 0.015, "o1-preview-cached": 0.0075, @@ -34,22 +96,6 @@ MODEL_COST_PER_1K_TOKENS = { # OpenAI o1-preview output "o1-preview-completion": 0.06, "o1-preview-2024-09-12-completion": 0.06, - # OpenAI o1-mini input - "o1-mini": 0.003, - "o1-mini-cached": 0.0015, - "o1-mini-2024-09-12": 0.003, - "o1-mini-2024-09-12-cached": 0.0015, - # OpenAI o1-mini output - "o1-mini-completion": 0.012, - "o1-mini-2024-09-12-completion": 0.012, - # GPT-4o-mini input - "gpt-4o-mini": 0.00015, - "gpt-4o-mini-cached": 0.000075, - "gpt-4o-mini-2024-07-18": 0.00015, - "gpt-4o-mini-2024-07-18-cached": 0.000075, - # GPT-4o-mini output - "gpt-4o-mini-completion": 0.0006, - "gpt-4o-mini-2024-07-18-completion": 0.0006, # GPT-4o input "gpt-4o": 0.0025, "gpt-4o-cached": 0.00125, @@ -63,6 +109,65 @@ MODEL_COST_PER_1K_TOKENS = { "gpt-4o-2024-05-13-completion": 0.015, "gpt-4o-2024-08-06-completion": 0.01, "gpt-4o-2024-11-20-completion": 0.01, + # GPT-4o-audio-preview input + "gpt-4o-audio-preview": 0.0025, + "gpt-4o-audio-preview-2024-12-17": 0.0025, + "gpt-4o-audio-preview-2024-10-01": 0.0025, + # GPT-4o-audio-preview output + "gpt-4o-audio-preview-completion": 0.01, + "gpt-4o-audio-preview-2024-12-17-completion": 0.01, + "gpt-4o-audio-preview-2024-10-01-completion": 0.01, + # GPT-4o-realtime-preview input + "gpt-4o-realtime-preview": 0.005, + "gpt-4o-realtime-preview-2024-12-17": 0.005, + "gpt-4o-realtime-preview-2024-10-01": 0.005, + "gpt-4o-realtime-preview-cached": 0.0025, + "gpt-4o-realtime-preview-2024-12-17-cached": 0.0025, + "gpt-4o-realtime-preview-2024-10-01-cached": 0.0025, + # GPT-4o-realtime-preview output + "gpt-4o-realtime-preview-completion": 0.02, + "gpt-4o-realtime-preview-2024-12-17-completion": 0.02, + "gpt-4o-realtime-preview-2024-10-01-completion": 0.02, + # GPT-4o-mini input + "gpt-4o-mini": 0.00015, + "gpt-4o-mini-cached": 0.000075, + "gpt-4o-mini-2024-07-18": 0.00015, + "gpt-4o-mini-2024-07-18-cached": 0.000075, + # GPT-4o-mini output + "gpt-4o-mini-completion": 0.0006, + "gpt-4o-mini-2024-07-18-completion": 0.0006, + # GPT-4o-mini-audio-preview input + "gpt-4o-mini-audio-preview": 0.00015, + "gpt-4o-mini-audio-preview-2024-12-17": 0.00015, + # GPT-4o-mini-audio-preview output + "gpt-4o-mini-audio-preview-completion": 0.0006, + "gpt-4o-mini-audio-preview-2024-12-17-completion": 0.0006, + # GPT-4o-mini-realtime-preview input + "gpt-4o-mini-realtime-preview": 0.0006, + "gpt-4o-mini-realtime-preview-2024-12-17": 0.0006, + "gpt-4o-mini-realtime-preview-cached": 0.0003, + "gpt-4o-mini-realtime-preview-2024-12-17-cached": 0.0003, + # GPT-4o-mini-realtime-preview output + "gpt-4o-mini-realtime-preview-completion": 0.0024, + "gpt-4o-mini-realtime-preview-2024-12-17-completion": 0.0024, + # GPT-4o-mini-search-preview input + "gpt-4o-mini-search-preview": 0.00015, + "gpt-4o-mini-search-preview-2025-03-11": 0.00015, + # GPT-4o-mini-search-preview output + "gpt-4o-mini-search-preview-completion": 0.0006, + "gpt-4o-mini-search-preview-2025-03-11-completion": 0.0006, + # GPT-4o-search-preview input + "gpt-4o-search-preview": 0.0025, + "gpt-4o-search-preview-2025-03-11": 0.0025, + # GPT-4o-search-preview output + "gpt-4o-search-preview-completion": 0.01, + "gpt-4o-search-preview-2025-03-11-completion": 0.01, + # Computer-use-preview input + "computer-use-preview": 0.003, + "computer-use-preview-2025-03-11": 0.003, + # Computer-use-preview output + "computer-use-preview-completion": 0.012, + "computer-use-preview-2025-03-11-completion": 0.012, # GPT-4 input "gpt-4": 0.03, "gpt-4-0314": 0.03, @@ -219,6 +324,7 @@ def standardize_model_name( or model_name.startswith("gpt-35") or model_name.startswith("o1-") or model_name.startswith("o3-") + or model_name.startswith("o4-") or ("finetuned" in model_name and "legacy" not in model_name) ): return model_name + "-completion" @@ -226,8 +332,10 @@ def standardize_model_name( token_type == TokenType.PROMPT_CACHED and ( model_name.startswith("gpt-4o") + or model_name.startswith("gpt-4.1") or model_name.startswith("o1") or model_name.startswith("o3") + or model_name.startswith("o4") ) and not (model_name.startswith("gpt-4o-2024-05-13")) ):