Fix wrong token counts from get_num_tokens from openai llms (#2952)

The encoding fetch was out of date. Luckily OpenAI has a nice[ `encoding_for_model`](46287bfa49/tiktoken/model.py) function in `tiktoken` we can use now.
2025-08-19 01:21:50 +00:00 · 2023-04-15 16:09:17 -07:00 · 2023-04-15 16:09:17 -07:00 · b9db20481f
commit b9db20481f
parent fea5619ce9
1 changed files with 2 additions and 8 deletions
--- a/langchain/llms/openai.py
+++ b/langchain/llms/openai.py
@ -446,15 +446,9 @@ class BaseOpenAI(BaseLLM):
                "This is needed in order to calculate get_num_tokens. "
                "Please install it with `pip install tiktoken`."
            )
-        encoder = "gpt2"
-        if self.model_name in ("text-davinci-003", "text-davinci-002"):
-            encoder = "p50k_base"
-        if self.model_name.startswith("code"):
-            encoder = "p50k_base"
-        # create a GPT-3 encoder instance
-        enc = tiktoken.get_encoding(encoder)

-        # encode the text using the GPT-3 encoder
+        enc = tiktoken.encoding_for_model(self.model_name)
+
        tokenized_text = enc.encode(text)

        # calculate the number of tokens in the encoded text