mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 15:04:13 +00:00
FAISS and embedding support (#48)
also adds embeddings and an in memory docstore
This commit is contained in:
1
tests/integration_tests/embeddings/__init__.py
Normal file
1
tests/integration_tests/embeddings/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Test embedding integrations."""
|
19
tests/integration_tests/embeddings/test_openai.py
Normal file
19
tests/integration_tests/embeddings/test_openai.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Test openai embeddings."""
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
|
||||
|
||||
def test_openai_embedding_documents() -> None:
|
||||
"""Test openai embeddings."""
|
||||
documents = ["foo bar"]
|
||||
embedding = OpenAIEmbeddings()
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 1
|
||||
assert len(output[0]) == 2048
|
||||
|
||||
|
||||
def test_openai_embedding_query() -> None:
|
||||
"""Test openai embeddings."""
|
||||
document = "foo bar"
|
||||
embedding = OpenAIEmbeddings()
|
||||
output = embedding.embed_query(document)
|
||||
assert len(output) == 2048
|
47
tests/integration_tests/test_faiss.py
Normal file
47
tests/integration_tests/test_faiss.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Test FAISS functionality."""
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.faiss import FAISS
|
||||
|
||||
|
||||
class FakeEmbeddings(Embeddings):
|
||||
"""Fake embeddings functionality for testing."""
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Return simple embeddings."""
|
||||
return [[i] * 10 for i in range(len(texts))]
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return simple embeddings."""
|
||||
return [0] * 10
|
||||
|
||||
|
||||
def test_faiss() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = FAISS.from_texts(texts, FakeEmbeddings())
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
"0": Document(page_content="foo"),
|
||||
"1": Document(page_content="bar"),
|
||||
"2": Document(page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
def test_faiss_search_not_found() -> None:
|
||||
"""Test what happens when document is not found."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = FAISS.from_texts(texts, FakeEmbeddings())
|
||||
# Get rid of the docstore to purposefully induce errors.
|
||||
docsearch.docstore = InMemoryDocstore({})
|
||||
with pytest.raises(ValueError):
|
||||
docsearch.similarity_search("foo")
|
21
tests/unit_tests/docstore/test_inmemory.py
Normal file
21
tests/unit_tests/docstore/test_inmemory.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""Test in memory docstore."""
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
|
||||
|
||||
def test_document_found() -> None:
|
||||
"""Test document found."""
|
||||
_dict = {"foo": Document(page_content="bar")}
|
||||
docstore = InMemoryDocstore(_dict)
|
||||
output = docstore.search("foo")
|
||||
assert isinstance(output, Document)
|
||||
assert output.page_content == "bar"
|
||||
|
||||
|
||||
def test_document_not_found() -> None:
|
||||
"""Test when document is not found."""
|
||||
_dict = {"foo": Document(page_content="bar")}
|
||||
docstore = InMemoryDocstore(_dict)
|
||||
output = docstore.search("bar")
|
||||
assert output == "ID bar not found."
|
Reference in New Issue
Block a user