mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-29 11:09:07 +00:00
New line should be remove only for the 1st gen embedding models (#3853)
Only 1st generation OpenAI embeddings models are negatively impacted by new lines. Context: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500
This commit is contained in:
parent
6bd367916c
commit
a5a4999fb7
@ -158,8 +158,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
indices = []
|
indices = []
|
||||||
encoding = tiktoken.model.encoding_for_model(self.model)
|
encoding = tiktoken.model.encoding_for_model(self.model)
|
||||||
for i, text in enumerate(texts):
|
for i, text in enumerate(texts):
|
||||||
# replace newlines, which can negatively affect performance.
|
if self.model.endswith("001"):
|
||||||
text = text.replace("\n", " ")
|
# See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500
|
||||||
|
# replace newlines, which can negatively affect performance.
|
||||||
|
text = text.replace("\n", " ")
|
||||||
token = encoding.encode(
|
token = encoding.encode(
|
||||||
text,
|
text,
|
||||||
allowed_special=self.allowed_special,
|
allowed_special=self.allowed_special,
|
||||||
@ -212,8 +214,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
if len(text) > self.embedding_ctx_length:
|
if len(text) > self.embedding_ctx_length:
|
||||||
return self._get_len_safe_embeddings([text], engine=engine)[0]
|
return self._get_len_safe_embeddings([text], engine=engine)[0]
|
||||||
else:
|
else:
|
||||||
# replace newlines, which can negatively affect performance.
|
if self.model.endswith("001"):
|
||||||
text = text.replace("\n", " ")
|
# See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500
|
||||||
|
# replace newlines, which can negatively affect performance.
|
||||||
|
text = text.replace("\n", " ")
|
||||||
return embed_with_retry(self, input=[text], engine=engine)["data"][0][
|
return embed_with_retry(self, input=[text], engine=engine)["data"][0][
|
||||||
"embedding"
|
"embedding"
|
||||||
]
|
]
|
||||||
|
Loading…
Reference in New Issue
Block a user