community: Add Laser Embedding Integration (#18111)

- **Description:** Added Integration with Meta AI's LASER
Language-Agnostic SEntence Representations embedding library, which
supports multilingual embedding for any of the languages listed here:
https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200,
including several low resource languages
- **Dependencies:** laser_encoders
This commit is contained in:
Dan Stambler
2024-02-26 15:16:37 -05:00
committed by GitHub
parent 257879e98d
commit 69344a0661
5 changed files with 270 additions and 0 deletions

View File

@@ -0,0 +1,29 @@
"""Test LASER embeddings."""
import pytest
from langchain_community.embeddings.laser import LaserEmbeddings
@pytest.mark.filterwarnings("ignore::UserWarning:")
@pytest.mark.parametrize("lang", [None, "lus_Latn", "english"])
def test_laser_embedding_documents(lang: str) -> None:
"""Test laser embeddings for documents.
User warning is returned by LASER library implementation
so will ignore in testing."""
documents = ["hello", "world"]
embedding = LaserEmbeddings(lang=lang)
output = embedding.embed_documents(documents)
assert len(output) == 2 # type: ignore[arg-type]
assert len(output[0]) == 1024 # type: ignore[index]
@pytest.mark.filterwarnings("ignore::UserWarning:")
@pytest.mark.parametrize("lang", [None, "lus_Latn", "english"])
def test_laser_embedding_query(lang: str) -> None:
"""Test laser embeddings for query.
User warning is returned by LASER library implementation
so will ignore in testing."""
query = "hello world"
embedding = LaserEmbeddings(lang=lang)
output = embedding.embed_query(query)
assert len(output) == 1024