diff --git a/langchain/llms/openai.py b/langchain/llms/openai.py index 554d5ce4a2a..6d00fde52d8 100644 --- a/langchain/llms/openai.py +++ b/langchain/llms/openai.py @@ -277,8 +277,13 @@ class BaseOpenAI(BaseLLM, BaseModel): "This is needed in order to calculate get_num_tokens. " "Please it install it with `pip install tiktoken`." ) + encoder = "gpt2" + if self.model_name in ("text-davinci-003", "text-davinci-002"): + encoder = "p50k_base" + if self.model_name.startswith("code"): + encoder = "p50k_base" # create a GPT-3 encoder instance - enc = tiktoken.get_encoding("gpt2") + enc = tiktoken.get_encoding(encoder) # encode the text using the GPT-3 encoder tokenized_text = enc.encode(text)