mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-15 22:44:36 +00:00
community: Add Laser Embedding Integration (#18111)
- **Description:** Added Integration with Meta AI's LASER Language-Agnostic SEntence Representations embedding library, which supports multilingual embedding for any of the languages listed here: https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200, including several low resource languages - **Dependencies:** laser_encoders
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
"""Test LASER embeddings."""
|
||||
import pytest
|
||||
|
||||
from langchain_community.embeddings.laser import LaserEmbeddings
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::UserWarning:")
|
||||
@pytest.mark.parametrize("lang", [None, "lus_Latn", "english"])
|
||||
def test_laser_embedding_documents(lang: str) -> None:
|
||||
"""Test laser embeddings for documents.
|
||||
User warning is returned by LASER library implementation
|
||||
so will ignore in testing."""
|
||||
documents = ["hello", "world"]
|
||||
embedding = LaserEmbeddings(lang=lang)
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 2 # type: ignore[arg-type]
|
||||
assert len(output[0]) == 1024 # type: ignore[index]
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::UserWarning:")
|
||||
@pytest.mark.parametrize("lang", [None, "lus_Latn", "english"])
|
||||
def test_laser_embedding_query(lang: str) -> None:
|
||||
"""Test laser embeddings for query.
|
||||
User warning is returned by LASER library implementation
|
||||
so will ignore in testing."""
|
||||
query = "hello world"
|
||||
embedding = LaserEmbeddings(lang=lang)
|
||||
output = embedding.embed_query(query)
|
||||
assert len(output) == 1024
|
Reference in New Issue
Block a user