mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-25 12:44:04 +00:00 
			
		
		
		
	**Description:** Added the new gpt-3.5-turbo-1106 for **finetuned** cost calculation, **Issue:** no issue found open By the information in OpenAI the pricing is the same as the older model (0613)
		
			
				
	
	
		
			227 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			227 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Callback Handler that prints to std out."""
 | |
| import threading
 | |
| from typing import Any, Dict, List
 | |
| 
 | |
| from langchain_core.callbacks import BaseCallbackHandler
 | |
| from langchain_core.outputs import LLMResult
 | |
| 
 | |
| MODEL_COST_PER_1K_TOKENS = {
 | |
|     # GPT-4 input
 | |
|     "gpt-4": 0.03,
 | |
|     "gpt-4-0314": 0.03,
 | |
|     "gpt-4-0613": 0.03,
 | |
|     "gpt-4-32k": 0.06,
 | |
|     "gpt-4-32k-0314": 0.06,
 | |
|     "gpt-4-32k-0613": 0.06,
 | |
|     "gpt-4-vision-preview": 0.01,
 | |
|     "gpt-4-1106-preview": 0.01,
 | |
|     # GPT-4 output
 | |
|     "gpt-4-completion": 0.06,
 | |
|     "gpt-4-0314-completion": 0.06,
 | |
|     "gpt-4-0613-completion": 0.06,
 | |
|     "gpt-4-32k-completion": 0.12,
 | |
|     "gpt-4-32k-0314-completion": 0.12,
 | |
|     "gpt-4-32k-0613-completion": 0.12,
 | |
|     "gpt-4-vision-preview-completion": 0.03,
 | |
|     "gpt-4-1106-preview-completion": 0.03,
 | |
|     # GPT-3.5 input
 | |
|     "gpt-3.5-turbo": 0.0015,
 | |
|     "gpt-3.5-turbo-0301": 0.0015,
 | |
|     "gpt-3.5-turbo-0613": 0.0015,
 | |
|     "gpt-3.5-turbo-1106": 0.001,
 | |
|     "gpt-3.5-turbo-instruct": 0.0015,
 | |
|     "gpt-3.5-turbo-16k": 0.003,
 | |
|     "gpt-3.5-turbo-16k-0613": 0.003,
 | |
|     # GPT-3.5 output
 | |
|     "gpt-3.5-turbo-completion": 0.002,
 | |
|     "gpt-3.5-turbo-0301-completion": 0.002,
 | |
|     "gpt-3.5-turbo-0613-completion": 0.002,
 | |
|     "gpt-3.5-turbo-1106-completion": 0.002,
 | |
|     "gpt-3.5-turbo-instruct-completion": 0.002,
 | |
|     "gpt-3.5-turbo-16k-completion": 0.004,
 | |
|     "gpt-3.5-turbo-16k-0613-completion": 0.004,
 | |
|     # Azure GPT-35 input
 | |
|     "gpt-35-turbo": 0.0015,  # Azure OpenAI version of ChatGPT
 | |
|     "gpt-35-turbo-0301": 0.0015,  # Azure OpenAI version of ChatGPT
 | |
|     "gpt-35-turbo-0613": 0.0015,
 | |
|     "gpt-35-turbo-instruct": 0.0015,
 | |
|     "gpt-35-turbo-16k": 0.003,
 | |
|     "gpt-35-turbo-16k-0613": 0.003,
 | |
|     # Azure GPT-35 output
 | |
|     "gpt-35-turbo-completion": 0.002,  # Azure OpenAI version of ChatGPT
 | |
|     "gpt-35-turbo-0301-completion": 0.002,  # Azure OpenAI version of ChatGPT
 | |
|     "gpt-35-turbo-0613-completion": 0.002,
 | |
|     "gpt-35-turbo-instruct-completion": 0.002,
 | |
|     "gpt-35-turbo-16k-completion": 0.004,
 | |
|     "gpt-35-turbo-16k-0613-completion": 0.004,
 | |
|     # Others
 | |
|     "text-ada-001": 0.0004,
 | |
|     "ada": 0.0004,
 | |
|     "text-babbage-001": 0.0005,
 | |
|     "babbage": 0.0005,
 | |
|     "text-curie-001": 0.002,
 | |
|     "curie": 0.002,
 | |
|     "text-davinci-003": 0.02,
 | |
|     "text-davinci-002": 0.02,
 | |
|     "code-davinci-002": 0.02,
 | |
|     # Fine Tuned input
 | |
|     "babbage-002-finetuned": 0.0016,
 | |
|     "davinci-002-finetuned": 0.012,
 | |
|     "gpt-3.5-turbo-0613-finetuned": 0.012,
 | |
|     "gpt-3.5-turbo-1106-finetuned": 0.012,
 | |
|     # Fine Tuned output
 | |
|     "babbage-002-finetuned-completion": 0.0016,
 | |
|     "davinci-002-finetuned-completion": 0.012,
 | |
|     "gpt-3.5-turbo-0613-finetuned-completion": 0.016,
 | |
|     "gpt-3.5-turbo-1106-finetuned-completion": 0.016,
 | |
|     # Azure Fine Tuned input
 | |
|     "babbage-002-azure-finetuned": 0.0004,
 | |
|     "davinci-002-azure-finetuned": 0.002,
 | |
|     "gpt-35-turbo-0613-azure-finetuned": 0.0015,
 | |
|     # Azure Fine Tuned output
 | |
|     "babbage-002-azure-finetuned-completion": 0.0004,
 | |
|     "davinci-002-azure-finetuned-completion": 0.002,
 | |
|     "gpt-35-turbo-0613-azure-finetuned-completion": 0.002,
 | |
|     # Legacy fine-tuned models
 | |
|     "ada-finetuned-legacy": 0.0016,
 | |
|     "babbage-finetuned-legacy": 0.0024,
 | |
|     "curie-finetuned-legacy": 0.012,
 | |
|     "davinci-finetuned-legacy": 0.12,
 | |
| }
 | |
| 
 | |
| 
 | |
| def standardize_model_name(
 | |
|     model_name: str,
 | |
|     is_completion: bool = False,
 | |
| ) -> str:
 | |
|     """
 | |
|     Standardize the model name to a format that can be used in the OpenAI API.
 | |
| 
 | |
|     Args:
 | |
|         model_name: Model name to standardize.
 | |
|         is_completion: Whether the model is used for completion or not.
 | |
|             Defaults to False.
 | |
| 
 | |
|     Returns:
 | |
|         Standardized model name.
 | |
| 
 | |
|     """
 | |
|     model_name = model_name.lower()
 | |
|     if ".ft-" in model_name:
 | |
|         model_name = model_name.split(".ft-")[0] + "-azure-finetuned"
 | |
|     if ":ft-" in model_name:
 | |
|         model_name = model_name.split(":")[0] + "-finetuned-legacy"
 | |
|     if "ft:" in model_name:
 | |
|         model_name = model_name.split(":")[1] + "-finetuned"
 | |
|     if is_completion and (
 | |
|         model_name.startswith("gpt-4")
 | |
|         or model_name.startswith("gpt-3.5")
 | |
|         or model_name.startswith("gpt-35")
 | |
|         or ("finetuned" in model_name and "legacy" not in model_name)
 | |
|     ):
 | |
|         return model_name + "-completion"
 | |
|     else:
 | |
|         return model_name
 | |
| 
 | |
| 
 | |
| def get_openai_token_cost_for_model(
 | |
|     model_name: str, num_tokens: int, is_completion: bool = False
 | |
| ) -> float:
 | |
|     """
 | |
|     Get the cost in USD for a given model and number of tokens.
 | |
| 
 | |
|     Args:
 | |
|         model_name: Name of the model
 | |
|         num_tokens: Number of tokens.
 | |
|         is_completion: Whether the model is used for completion or not.
 | |
|             Defaults to False.
 | |
| 
 | |
|     Returns:
 | |
|         Cost in USD.
 | |
|     """
 | |
|     model_name = standardize_model_name(model_name, is_completion=is_completion)
 | |
|     if model_name not in MODEL_COST_PER_1K_TOKENS:
 | |
|         raise ValueError(
 | |
|             f"Unknown model: {model_name}. Please provide a valid OpenAI model name."
 | |
|             "Known models are: " + ", ".join(MODEL_COST_PER_1K_TOKENS.keys())
 | |
|         )
 | |
|     return MODEL_COST_PER_1K_TOKENS[model_name] * (num_tokens / 1000)
 | |
| 
 | |
| 
 | |
| class OpenAICallbackHandler(BaseCallbackHandler):
 | |
|     """Callback Handler that tracks OpenAI info."""
 | |
| 
 | |
|     total_tokens: int = 0
 | |
|     prompt_tokens: int = 0
 | |
|     completion_tokens: int = 0
 | |
|     successful_requests: int = 0
 | |
|     total_cost: float = 0.0
 | |
| 
 | |
|     def __init__(self) -> None:
 | |
|         super().__init__()
 | |
|         self._lock = threading.Lock()
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return (
 | |
|             f"Tokens Used: {self.total_tokens}\n"
 | |
|             f"\tPrompt Tokens: {self.prompt_tokens}\n"
 | |
|             f"\tCompletion Tokens: {self.completion_tokens}\n"
 | |
|             f"Successful Requests: {self.successful_requests}\n"
 | |
|             f"Total Cost (USD): ${self.total_cost}"
 | |
|         )
 | |
| 
 | |
|     @property
 | |
|     def always_verbose(self) -> bool:
 | |
|         """Whether to call verbose callbacks even if verbose is False."""
 | |
|         return True
 | |
| 
 | |
|     def on_llm_start(
 | |
|         self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
 | |
|     ) -> None:
 | |
|         """Print out the prompts."""
 | |
|         pass
 | |
| 
 | |
|     def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
 | |
|         """Print out the token."""
 | |
|         pass
 | |
| 
 | |
|     def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
 | |
|         """Collect token usage."""
 | |
|         if response.llm_output is None:
 | |
|             return None
 | |
| 
 | |
|         if "token_usage" not in response.llm_output:
 | |
|             with self._lock:
 | |
|                 self.successful_requests += 1
 | |
|             return None
 | |
| 
 | |
|         # compute tokens and cost for this request
 | |
|         token_usage = response.llm_output["token_usage"]
 | |
|         completion_tokens = token_usage.get("completion_tokens", 0)
 | |
|         prompt_tokens = token_usage.get("prompt_tokens", 0)
 | |
|         model_name = standardize_model_name(response.llm_output.get("model_name", ""))
 | |
|         if model_name in MODEL_COST_PER_1K_TOKENS:
 | |
|             completion_cost = get_openai_token_cost_for_model(
 | |
|                 model_name, completion_tokens, is_completion=True
 | |
|             )
 | |
|             prompt_cost = get_openai_token_cost_for_model(model_name, prompt_tokens)
 | |
|         else:
 | |
|             completion_cost = 0
 | |
|             prompt_cost = 0
 | |
| 
 | |
|         # update shared state behind lock
 | |
|         with self._lock:
 | |
|             self.total_cost += prompt_cost + completion_cost
 | |
|             self.total_tokens += token_usage.get("total_tokens", 0)
 | |
|             self.prompt_tokens += prompt_tokens
 | |
|             self.completion_tokens += completion_tokens
 | |
|             self.successful_requests += 1
 | |
| 
 | |
|     def __copy__(self) -> "OpenAICallbackHandler":
 | |
|         """Return a copy of the callback handler."""
 | |
|         return self
 | |
| 
 | |
|     def __deepcopy__(self, memo: Any) -> "OpenAICallbackHandler":
 | |
|         """Return a deep copy of the callback handler."""
 | |
|         return self
 |