mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-02 13:08:57 +00:00
- [ ] **Community**: "Retrievers: Product Quantization" - [X] This PR adds Product Quantization feature to the retrievers to the Langchain Community. PQ is one of the fastest retrieval methods if the embeddings are rich enough in context due to the concepts of quantization and representation through centroids - **Description:** Adding PQ as one of the retrievers - **Dependencies:** using the package nanopq for this PR - **Twitter handle:** vishnunkumar_ - [X] **Add tests and docs**: If you're adding a new integration, please include - [X] Added unit tests for the same in the retrievers. - [] Will add an example notebook subsequently - [X] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ - done the same --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Chester Curme <chester.curme@gmail.com>
42 lines
1.6 KiB
Python
42 lines
1.6 KiB
Python
import pytest
|
|
from langchain_core.documents import Document
|
|
|
|
from langchain_community.embeddings import FakeEmbeddings
|
|
from langchain_community.retrievers import NanoPQRetriever
|
|
|
|
|
|
class TestNanoPQRetriever:
|
|
@pytest.mark.requires("nanopq")
|
|
def test_from_texts(self) -> None:
|
|
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
|
|
pq_retriever = NanoPQRetriever.from_texts(
|
|
texts=input_texts, embeddings=FakeEmbeddings(size=100)
|
|
)
|
|
assert len(pq_retriever.texts) == 3
|
|
|
|
@pytest.mark.requires("nanopq")
|
|
def test_from_documents(self) -> None:
|
|
input_docs = [
|
|
Document(page_content="I have a pen.", metadata={"page": 1}),
|
|
Document(page_content="Do you have a pen?", metadata={"page": 2}),
|
|
Document(page_content="I have a bag.", metadata={"page": 3}),
|
|
]
|
|
pq_retriever = NanoPQRetriever.from_documents(
|
|
documents=input_docs, embeddings=FakeEmbeddings(size=100)
|
|
)
|
|
assert pq_retriever.texts == [
|
|
"I have a pen.",
|
|
"Do you have a pen?",
|
|
"I have a bag.",
|
|
]
|
|
assert pq_retriever.metadatas == [{"page": 1}, {"page": 2}, {"page": 3}]
|
|
|
|
@pytest.mark.requires("nanopq")
|
|
def invalid_subspace_error(self) -> None:
|
|
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
|
|
pq_retriever = NanoPQRetriever.from_texts(
|
|
texts=input_texts, embeddings=FakeEmbeddings(size=43)
|
|
)
|
|
with pytest.raises(RuntimeError):
|
|
pq_retriever.invoke("I have")
|