mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-18 16:16:33 +00:00
encoding_kwargs
for InstructEmbeddings (#5450)
# What does this PR do? Bring support of `encode_kwargs` for ` HuggingFaceInstructEmbeddings`, change the docstring example and add a test to illustrate with `normalize_embeddings`. Fixes #3605 (Similar to #3914) Use case: ```python from langchain.embeddings import HuggingFaceInstructEmbeddings model_name = "hkunlp/instructor-large" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': True} hf = HuggingFaceInstructEmbeddings( model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) ```
This commit is contained in:
@@ -26,7 +26,8 @@ def test_huggingface_embedding_query() -> None:
|
||||
def test_huggingface_instructor_embedding_documents() -> None:
|
||||
"""Test huggingface embeddings."""
|
||||
documents = ["foo bar"]
|
||||
embedding = HuggingFaceInstructEmbeddings()
|
||||
model_name = "hkunlp/instructor-base"
|
||||
embedding = HuggingFaceInstructEmbeddings(model_name=model_name)
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 1
|
||||
assert len(output[0]) == 768
|
||||
@@ -35,6 +36,22 @@ def test_huggingface_instructor_embedding_documents() -> None:
|
||||
def test_huggingface_instructor_embedding_query() -> None:
|
||||
"""Test huggingface embeddings."""
|
||||
query = "foo bar"
|
||||
embedding = HuggingFaceInstructEmbeddings()
|
||||
model_name = "hkunlp/instructor-base"
|
||||
embedding = HuggingFaceInstructEmbeddings(model_name=model_name)
|
||||
output = embedding.embed_query(query)
|
||||
assert len(output) == 768
|
||||
|
||||
|
||||
def test_huggingface_instructor_embedding_normalize() -> None:
|
||||
"""Test huggingface embeddings."""
|
||||
query = "foo bar"
|
||||
model_name = "hkunlp/instructor-base"
|
||||
encode_kwargs = {"normalize_embeddings": True}
|
||||
embedding = HuggingFaceInstructEmbeddings(
|
||||
model_name=model_name, encode_kwargs=encode_kwargs
|
||||
)
|
||||
output = embedding.embed_query(query)
|
||||
assert len(output) == 768
|
||||
eps = 1e-5
|
||||
norm = sum([o**2 for o in output])
|
||||
assert abs(1 - norm) <= eps
|
||||
|
Reference in New Issue
Block a user