langchain/libs/community/tests/unit_tests/retrievers/test_nanopq.py
Vishnu Nandakumar e271965d1e
community: retrievers: added capability for using Product Quantization as one of the retriever. (#22424)
- [ ] **Community**: "Retrievers: Product Quantization"
- [X] This PR adds Product Quantization feature to the retrievers to the
Langchain Community. PQ is one of the fastest retrieval methods if the
embeddings are rich enough in context due to the concepts of
quantization and representation through centroids
    - **Description:** Adding PQ as one of the retrievers
    - **Dependencies:** using the package nanopq for this PR
    - **Twitter handle:** vishnunkumar_


- [X] **Add tests and docs**: If you're adding a new integration, please
include
   - [X] Added unit tests for the same in the retrievers.
   - [] Will add an example notebook subsequently

- [X] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/ -
done the same

---------

Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
2024-07-24 13:52:15 +00:00

42 lines
1.6 KiB
Python

import pytest
from langchain_core.documents import Document
from langchain_community.embeddings import FakeEmbeddings
from langchain_community.retrievers import NanoPQRetriever
class TestNanoPQRetriever:
@pytest.mark.requires("nanopq")
def test_from_texts(self) -> None:
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
pq_retriever = NanoPQRetriever.from_texts(
texts=input_texts, embeddings=FakeEmbeddings(size=100)
)
assert len(pq_retriever.texts) == 3
@pytest.mark.requires("nanopq")
def test_from_documents(self) -> None:
input_docs = [
Document(page_content="I have a pen.", metadata={"page": 1}),
Document(page_content="Do you have a pen?", metadata={"page": 2}),
Document(page_content="I have a bag.", metadata={"page": 3}),
]
pq_retriever = NanoPQRetriever.from_documents(
documents=input_docs, embeddings=FakeEmbeddings(size=100)
)
assert pq_retriever.texts == [
"I have a pen.",
"Do you have a pen?",
"I have a bag.",
]
assert pq_retriever.metadatas == [{"page": 1}, {"page": 2}, {"page": 3}]
@pytest.mark.requires("nanopq")
def invalid_subspace_error(self) -> None:
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
pq_retriever = NanoPQRetriever.from_texts(
texts=input_texts, embeddings=FakeEmbeddings(size=43)
)
with pytest.raises(RuntimeError):
pq_retriever.invoke("I have")