Feature/fix azureopenai model mappings (#8621)

This pull request aims to ensure that the `OpenAICallbackHandler` can
properly calculate the total cost for Azure OpenAI chat models. The
following changes have resolved this issue:

- The `model_name` has been added to the ChatResult llm_output. Without
this, the default values of `gpt-35-turbo` were applied. This was
causing the total cost for Azure OpenAI's GPT-4 to be significantly
inaccurate.
- A new parameter `model_version` has been added to `AzureChatOpenAI`.
Azure does not include the model version in the response. With the
addition of `model_name`, this is not a significant issue for GPT-4
models, but it's an issue for GPT-3.5-Turbo. Version 0301 (default) of
GPT-3.5-Turbo on Azure has a flat rate of 0.002 per 1k tokens for both
prompt and completion. However, version 0613 introduced a split in
pricing for prompt and completion tokens.
- The `OpenAICallbackHandler` implementation has been updated with the
proper model names, versions, and cost per 1k tokens.

Unit tests have been added to ensure the functionality works as
expected; the Azure ChatOpenAI notebook has been updated with examples.

Maintainers: @hwchase17, @baskaryan

Twitter handle: @jjczopek

---------

Co-authored-by: Jerzy Czopek <jerzy.czopek@avanade.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Jerzy Czopek
2023-08-09 19:56:15 +02:00
committed by GitHub
parent 269f85b7b7
commit 539672a7fd
5 changed files with 274 additions and 6 deletions

View File

@@ -74,6 +74,124 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []
},
{
"cell_type": "markdown",
"id": "f27fa24d",
"metadata": {},
"source": [
"## Model Version\n",
"Azure OpenAI responses contain `model` property, which is name of the model used to generate the response. However unlike native OpenAI responses, it does not contain the version of the model, which is set on the deplyoment in Azure. This makes it tricky to know which version of the model was used to generate the response, which as result can lead to e.g. wrong total cost calculation with `OpenAICallbackHandler`.\n",
"\n",
"To solve this problem, you can pass `model_version` parameter to `AzureChatOpenAI` class, which will be added to the model name in the llm output. This way you can easily distinguish between different versions of the model."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0531798a",
"metadata": {},
"outputs": [],
"source": [
"from langchain.callbacks import get_openai_callback"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "3fd97dfc",
"metadata": {},
"outputs": [],
"source": [
"BASE_URL = \"https://{endpoint}.openai.azure.com\"\n",
"API_KEY = \"...\"\n",
"DEPLOYMENT_NAME = \"gpt-35-turbo\" # in Azure, this deployment has version 0613 - input and output tokens are counted separately"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "aceddb72",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Cost (USD): $0.000054\n"
]
}
],
"source": [
"model = AzureChatOpenAI(\n",
" openai_api_base=BASE_URL,\n",
" openai_api_version=\"2023-05-15\",\n",
" deployment_name=DEPLOYMENT_NAME,\n",
" openai_api_key=API_KEY,\n",
" openai_api_type=\"azure\",\n",
")\n",
"with get_openai_callback() as cb:\n",
" model(\n",
" [\n",
" HumanMessage(\n",
" content=\"Translate this sentence from English to French. I love programming.\"\n",
" )\n",
" ]\n",
" )\n",
" print(f\"Total Cost (USD): ${format(cb.total_cost, '.6f')}\") # without specifying the model version, flat-rate 0.002 USD per 1k input and output tokens is used\n"
]
},
{
"cell_type": "markdown",
"id": "2e61eefd",
"metadata": {},
"source": [
"We can provide the model version to `AzureChatOpenAI` constructor. It will get appended to the model name returned by Azure OpenAI and cost will be counted correctly."
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8d5e54e9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Cost (USD): $0.000044\n"
]
}
],
"source": [
"model0613 = AzureChatOpenAI(\n",
" openai_api_base=BASE_URL,\n",
" openai_api_version=\"2023-05-15\",\n",
" deployment_name=DEPLOYMENT_NAME,\n",
" openai_api_key=API_KEY,\n",
" openai_api_type=\"azure\",\n",
" model_version=\"0613\"\n",
")\n",
"with get_openai_callback() as cb:\n",
" model0613(\n",
" [\n",
" HumanMessage(\n",
" content=\"Translate this sentence from English to French. I love programming.\"\n",
" )\n",
" ]\n",
" )\n",
" print(f\"Total Cost (USD): ${format(cb.total_cost, '.6f')}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99682534",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@@ -92,7 +210,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.9.1" "version": "3.8.10"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -31,8 +31,19 @@ MODEL_COST_PER_1K_TOKENS = {
"gpt-3.5-turbo-0613-completion": 0.002, "gpt-3.5-turbo-0613-completion": 0.002,
"gpt-3.5-turbo-16k-completion": 0.004, "gpt-3.5-turbo-16k-completion": 0.004,
"gpt-3.5-turbo-16k-0613-completion": 0.004, "gpt-3.5-turbo-16k-0613-completion": 0.004,
# Azure GPT-35 input
"gpt-35-turbo": 0.0015, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0301": 0.0015, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0613": 0.0015,
"gpt-35-turbo-16k": 0.003,
"gpt-35-turbo-16k-0613": 0.003,
# Azure GPT-35 output
"gpt-35-turbo-completion": 0.002, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0301-completion": 0.002, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0613-completion": 0.002,
"gpt-35-turbo-16k-completion": 0.004,
"gpt-35-turbo-16k-0613-completion": 0.004,
# Others # Others
"gpt-35-turbo": 0.002, # Azure OpenAI version of ChatGPT
"text-ada-001": 0.0004, "text-ada-001": 0.0004,
"ada": 0.0004, "ada": 0.0004,
"text-babbage-001": 0.0005, "text-babbage-001": 0.0005,
@@ -69,7 +80,9 @@ def standardize_model_name(
if "ft-" in model_name: if "ft-" in model_name:
return model_name.split(":")[0] + "-finetuned" return model_name.split(":")[0] + "-finetuned"
elif is_completion and ( elif is_completion and (
model_name.startswith("gpt-4") or model_name.startswith("gpt-3.5") model_name.startswith("gpt-4")
or model_name.startswith("gpt-3.5")
or model_name.startswith("gpt-35")
): ):
return model_name + "-completion" return model_name + "-completion"
else: else:

View File

@@ -40,11 +40,20 @@ class AzureChatOpenAI(ChatOpenAI):
Be aware the API version may change. Be aware the API version may change.
You can also specify the version of the model using ``model_version`` constructor
parameter, as Azure OpenAI doesn't return model version with the response.
Default is empty. When you specify the version, it will be appended to the
model name in the response. Setting correct version will help you to calculate the
cost properly. Model version is not validated, so make sure you set it correctly
to get the correct cost.
Any parameters that are valid to be passed to the openai.create call can be passed Any parameters that are valid to be passed to the openai.create call can be passed
in, even if not explicitly saved on this class. in, even if not explicitly saved on this class.
""" """
deployment_name: str = "" deployment_name: str = ""
model_version: str = ""
openai_api_type: str = "" openai_api_type: str = ""
openai_api_base: str = "" openai_api_base: str = ""
openai_api_version: str = "" openai_api_version: str = ""
@@ -137,7 +146,19 @@ class AzureChatOpenAI(ChatOpenAI):
for res in response["choices"]: for res in response["choices"]:
if res.get("finish_reason", None) == "content_filter": if res.get("finish_reason", None) == "content_filter":
raise ValueError( raise ValueError(
"Azure has not provided the response due to a content" "Azure has not provided the response due to a content filter "
" filter being triggered" "being triggered"
) )
return super()._create_chat_result(response) chat_result = super()._create_chat_result(response)
if "model" in response:
model = response["model"]
if self.model_version:
model = f"{model}-{self.model_version}"
if chat_result.llm_output is not None and isinstance(
chat_result.llm_output, dict
):
chat_result.llm_output["model_name"] = model
return chat_result

View File

@@ -60,3 +60,67 @@ def test_on_llm_end_finetuned_model(handler: OpenAICallbackHandler) -> None:
) )
handler.on_llm_end(response) handler.on_llm_end(response)
assert handler.total_cost > 0 assert handler.total_cost > 0
@pytest.mark.parametrize(
"model_name,expected_cost",
[
("gpt-35-turbo", 0.0035),
("gpt-35-turbo-0301", 0.0035),
(
"gpt-35-turbo-0613",
0.0035,
),
(
"gpt-35-turbo-16k-0613",
0.007,
),
(
"gpt-35-turbo-16k",
0.007,
),
("gpt-4", 0.09),
("gpt-4-0314", 0.09),
("gpt-4-0613", 0.09),
("gpt-4-32k", 0.18),
("gpt-4-32k-0314", 0.18),
("gpt-4-32k-0613", 0.18),
],
)
def test_on_llm_end_azure_openai(
handler: OpenAICallbackHandler, model_name: str, expected_cost: float
) -> None:
response = LLMResult(
generations=[],
llm_output={
"token_usage": {
"prompt_tokens": 1000,
"completion_tokens": 1000,
"total_tokens": 2000,
},
"model_name": model_name,
},
)
handler.on_llm_end(response)
assert handler.total_cost == expected_cost
@pytest.mark.parametrize(
"model_name", ["gpt-35-turbo-16k-0301", "gpt-4-0301", "gpt-4-32k-0301"]
)
def test_on_llm_end_no_cost_invalid_model(
handler: OpenAICallbackHandler, model_name: str
) -> None:
response = LLMResult(
generations=[],
llm_output={
"token_usage": {
"prompt_tokens": 1000,
"completion_tokens": 1000,
"total_tokens": 2000,
},
"model_name": model_name,
},
)
handler.on_llm_end(response)
assert handler.total_cost == 0

View File

@@ -0,0 +1,52 @@
import json
import os
from typing import Any, Mapping, cast
import pytest
from langchain.chat_models.azure_openai import AzureChatOpenAI
os.environ["OPENAI_API_KEY"] = "test"
os.environ["OPENAI_API_BASE"] = "https://oai.azure.com/"
os.environ["OPENAI_API_VERSION"] = "2023-05-01"
@pytest.mark.requires("openai")
@pytest.mark.parametrize(
"model_name", ["gpt-4", "gpt-4-32k", "gpt-35-turbo", "gpt-35-turbo-16k"]
)
def test_model_name_set_on_chat_result_when_present_in_response(
model_name: str,
) -> None:
sample_response_text = f"""
{{
"id": "chatcmpl-7ryweq7yc8463fas879t9hdkkdf",
"object": "chat.completion",
"created": 1690381189,
"model": "{model_name}",
"choices": [
{{
"index": 0,
"finish_reason": "stop",
"message": {{
"role": "assistant",
"content": "I'm an AI assistant that can help you."
}}
}}
],
"usage": {{
"completion_tokens": 28,
"prompt_tokens": 15,
"total_tokens": 43
}}
}}
"""
# convert sample_response_text to instance of Mapping[str, Any]
sample_response = json.loads(sample_response_text)
mock_response = cast(Mapping[str, Any], sample_response)
mock_chat = AzureChatOpenAI()
chat_result = mock_chat._create_chat_result(mock_response)
assert (
chat_result.llm_output is not None
and chat_result.llm_output["model_name"] == model_name
)