mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-12 14:23:58 +00:00
Initial Commit
This commit is contained in:
parent
990a69d9d7
commit
841149bffa
33
libs/partners/coherence/.gitignore
vendored
Normal file
33
libs/partners/coherence/.gitignore
vendored
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*.egg
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
.eggs/
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv/
|
||||||
|
.env/
|
||||||
|
|
||||||
|
# uv cache
|
||||||
|
.uv/
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
htmlcov/
|
||||||
|
.cache/
|
||||||
|
.coverage
|
||||||
|
coverage.xml
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
19
libs/partners/coherence/Makefile
Normal file
19
libs/partners/coherence/Makefile
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
.PHONY: install lint format test clean
|
||||||
|
|
||||||
|
install:
|
||||||
|
uv pip install -e .[lint,typing,test]
|
||||||
|
|
||||||
|
lint:
|
||||||
|
ruff check langchain_coherence tests
|
||||||
|
|
||||||
|
format:
|
||||||
|
ruff format langchain_coherence tests
|
||||||
|
|
||||||
|
fix: format lint
|
||||||
|
|
||||||
|
test:
|
||||||
|
pytest
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf .pytest_cache .mypy_cache .ruff_cache .uv __pycache__ *.egg-info build dist
|
||||||
|
|
15
libs/partners/coherence/README.md
Normal file
15
libs/partners/coherence/README.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# LangChain Coherence Integration
|
||||||
|
|
||||||
|
This package integrates Oracle Coherence as a vector store in LangChain.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install langchain_coherence
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain_coherence import CoherenceVectorStore
|
||||||
|
```
|
11
libs/partners/coherence/coherence.iml
Normal file
11
libs/partners/coherence/coherence.iml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="WEB_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||||
|
<exclude-output />
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
5
libs/partners/coherence/langchain_coherence/__init__.py
Normal file
5
libs/partners/coherence/langchain_coherence/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
__version__ = "0.0.1"
|
||||||
|
|
||||||
|
from .coherence_store import CoherenceVectorStore
|
534
libs/partners/coherence/langchain_coherence/coherence_store.py
Normal file
534
libs/partners/coherence/langchain_coherence/coherence_store.py
Normal file
@ -0,0 +1,534 @@
|
|||||||
|
"""Coherence vector store."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
from typing import (
|
||||||
|
TYPE_CHECKING,
|
||||||
|
Any,
|
||||||
|
Final,
|
||||||
|
Optional,
|
||||||
|
cast,
|
||||||
|
)
|
||||||
|
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Iterator, Sequence
|
||||||
|
|
||||||
|
import jsonpickle # type: ignore[import-untyped]
|
||||||
|
from coherence import ( # type: ignore[import-untyped]
|
||||||
|
Extractors,
|
||||||
|
Filters,
|
||||||
|
NamedCache,
|
||||||
|
)
|
||||||
|
from coherence.ai import ( # type: ignore[import-untyped]
|
||||||
|
CosineDistance,
|
||||||
|
DistanceAlgorithm,
|
||||||
|
FloatVector,
|
||||||
|
HnswIndex,
|
||||||
|
QueryResult,
|
||||||
|
SimilaritySearch,
|
||||||
|
Vector,
|
||||||
|
Vectors,
|
||||||
|
)
|
||||||
|
from coherence.extractor import ( # type: ignore[import-untyped]
|
||||||
|
ValueExtractor,
|
||||||
|
)
|
||||||
|
from coherence.filter import ( # type: ignore[import-untyped]
|
||||||
|
Filter,
|
||||||
|
)
|
||||||
|
from coherence.serialization import ( # type: ignore[import-untyped]
|
||||||
|
JSONSerializer,
|
||||||
|
SerializerRegistry,
|
||||||
|
)
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
|
|
||||||
|
class CoherenceVectorStore(VectorStore):
|
||||||
|
"""Coherence VectorStore implementation.
|
||||||
|
|
||||||
|
Uses Coherence NamedCache, for similarity search.
|
||||||
|
|
||||||
|
Setup:
|
||||||
|
Install ``langchain-core``.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
pip install -U langchain-core
|
||||||
|
|
||||||
|
Add Documents and retrieve them:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||||
|
|
||||||
|
from coherence import NamedMap, Session
|
||||||
|
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||||
|
|
||||||
|
session: Session = await Session.create()
|
||||||
|
try:
|
||||||
|
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||||
|
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||||
|
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||||
|
# this embedding generates vectors of dimension 384
|
||||||
|
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||||
|
named_map,embedding,384)
|
||||||
|
d1 :Document = Document(id="1", page_content="apple")
|
||||||
|
d2 :Document = Document(id="2", page_content="orange")
|
||||||
|
documents = [d1, d2]
|
||||||
|
await cvs.aadd_documents(documents)
|
||||||
|
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
l = await cvs.aget_by_ids(ids)
|
||||||
|
assert len(l) == len(ids)
|
||||||
|
print("====")
|
||||||
|
for e in l:
|
||||||
|
print(e)
|
||||||
|
finally:
|
||||||
|
await session.close()
|
||||||
|
|
||||||
|
Delete Documents:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||||
|
|
||||||
|
from coherence import NamedMap, Session
|
||||||
|
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||||
|
|
||||||
|
session: Session = await Session.create()
|
||||||
|
try:
|
||||||
|
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||||
|
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||||
|
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||||
|
# this embedding generates vectors of dimension 384
|
||||||
|
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||||
|
named_map,embedding,384)
|
||||||
|
d1 :Document = Document(id="1", page_content="apple")
|
||||||
|
d2 :Document = Document(id="2", page_content="orange")
|
||||||
|
documents = [d1, d2]
|
||||||
|
await cvs.aadd_documents(documents)
|
||||||
|
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
await cvs.adelete(ids)
|
||||||
|
finally:
|
||||||
|
await session.close()
|
||||||
|
|
||||||
|
Similarity Search:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||||
|
|
||||||
|
from coherence import NamedMap, Session
|
||||||
|
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||||
|
|
||||||
|
def test_data():
|
||||||
|
d1 :Document = Document(id="1", page_content="apple")
|
||||||
|
d2 :Document = Document(id="2", page_content="orange")
|
||||||
|
d3 :Document = Document(id="3", page_content="tiger")
|
||||||
|
d4 :Document = Document(id="4", page_content="cat")
|
||||||
|
d5 :Document = Document(id="5", page_content="dog")
|
||||||
|
d6 :Document = Document(id="6", page_content="fox")
|
||||||
|
d7 :Document = Document(id="7", page_content="pear")
|
||||||
|
d8 :Document = Document(id="8", page_content="banana")
|
||||||
|
d9 :Document = Document(id="9", page_content="plum")
|
||||||
|
d10 :Document = Document(id="10", page_content="lion")
|
||||||
|
|
||||||
|
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||||
|
return documents
|
||||||
|
|
||||||
|
async def test_asimilarity_search():
|
||||||
|
documents = test_data()
|
||||||
|
session: Session = await Session.create()
|
||||||
|
try:
|
||||||
|
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||||
|
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||||
|
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||||
|
# this embedding generates vectors of dimension 384
|
||||||
|
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||||
|
named_map,embedding,384)
|
||||||
|
await cvs.aadd_documents(documents)
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
l = await cvs.aget_by_ids(ids)
|
||||||
|
assert len(l) == 10
|
||||||
|
|
||||||
|
result = await cvs.asimilarity_search("fruit")
|
||||||
|
assert len(result) == 4
|
||||||
|
print("====")
|
||||||
|
for e in result:
|
||||||
|
print(e)
|
||||||
|
finally:
|
||||||
|
await session.close()
|
||||||
|
|
||||||
|
Similarity Search by vector :
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||||
|
|
||||||
|
from coherence import NamedMap, Session
|
||||||
|
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||||
|
|
||||||
|
def test_data():
|
||||||
|
d1 :Document = Document(id="1", page_content="apple")
|
||||||
|
d2 :Document = Document(id="2", page_content="orange")
|
||||||
|
d3 :Document = Document(id="3", page_content="tiger")
|
||||||
|
d4 :Document = Document(id="4", page_content="cat")
|
||||||
|
d5 :Document = Document(id="5", page_content="dog")
|
||||||
|
d6 :Document = Document(id="6", page_content="fox")
|
||||||
|
d7 :Document = Document(id="7", page_content="pear")
|
||||||
|
d8 :Document = Document(id="8", page_content="banana")
|
||||||
|
d9 :Document = Document(id="9", page_content="plum")
|
||||||
|
d10 :Document = Document(id="10", page_content="lion")
|
||||||
|
|
||||||
|
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||||
|
return documents
|
||||||
|
|
||||||
|
async def test_asimilarity_search_by_vector():
|
||||||
|
documents = test_data()
|
||||||
|
session: Session = await Session.create()
|
||||||
|
try:
|
||||||
|
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||||
|
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||||
|
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||||
|
# this embedding generates vectors of dimension 384
|
||||||
|
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||||
|
named_map,embedding,384)
|
||||||
|
await cvs.aadd_documents(documents)
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
l = await cvs.aget_by_ids(ids)
|
||||||
|
assert len(l) == 10
|
||||||
|
|
||||||
|
vector = cvs.embeddings.embed_query("fruit")
|
||||||
|
result = await cvs.asimilarity_search_by_vector(vector)
|
||||||
|
assert len(result) == 4
|
||||||
|
print("====")
|
||||||
|
for e in result:
|
||||||
|
print(e)
|
||||||
|
finally:
|
||||||
|
await session.close()
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
VECTOR_FIELD: Final[str] = "__dict__.metadata.vector"
|
||||||
|
"""The name of the field containing the vector embeddings."""
|
||||||
|
|
||||||
|
VECTOR_EXTRACTOR: Final[ValueExtractor] = Extractors.extract(VECTOR_FIELD)
|
||||||
|
"""The ValueExtractor to extract the embeddings vector."""
|
||||||
|
|
||||||
|
def __init__(self, coherence_cache: NamedCache, embedding: Embeddings):
|
||||||
|
"""Initialize with Coherence cache and embedding function.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
coherence_cache: Coherence NamedCache to use
|
||||||
|
embedding: embedding function to use.
|
||||||
|
"""
|
||||||
|
self.cache = coherence_cache
|
||||||
|
self.embedding = embedding
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def create(coherence_cache: NamedCache, embedding: Embeddings,
|
||||||
|
dimensions: int
|
||||||
|
) -> CoherenceVectorStore:
|
||||||
|
"""Create an instance of CoherenceVectorStore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
coherence_cache: Coherence NamedCache to use
|
||||||
|
embedding: embedding function to use.
|
||||||
|
dimensions: size of the vector created by the embedding function
|
||||||
|
"""
|
||||||
|
coh_store: CoherenceVectorStore = CoherenceVectorStore(coherence_cache,
|
||||||
|
embedding)
|
||||||
|
await coherence_cache.add_index(HnswIndex(
|
||||||
|
CoherenceVectorStore.VECTOR_EXTRACTOR, dimensions))
|
||||||
|
return coh_store
|
||||||
|
|
||||||
|
@property
|
||||||
|
@override
|
||||||
|
def embeddings(self) -> Embeddings:
|
||||||
|
return self.embedding
|
||||||
|
|
||||||
|
@override
|
||||||
|
def add_documents(
|
||||||
|
self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any
|
||||||
|
) -> list[str]:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def aadd_documents(
|
||||||
|
self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any
|
||||||
|
) -> list[str]:
|
||||||
|
"""Add documents to the store."""
|
||||||
|
texts = [doc.page_content for doc in documents]
|
||||||
|
vectors = await self.embedding.aembed_documents(texts)
|
||||||
|
|
||||||
|
# Apply normalization and wrap in FloatVector
|
||||||
|
float_vectors = [FloatVector(Vectors.normalize(vector)) for vector in vectors]
|
||||||
|
|
||||||
|
if ids and len(ids) != len(texts):
|
||||||
|
msg = (
|
||||||
|
f"ids must be the same length as texts. "
|
||||||
|
f"Got {len(ids)} ids and {len(texts)} texts."
|
||||||
|
)
|
||||||
|
raise ValueError(msg)
|
||||||
|
|
||||||
|
id_iterator: Iterator[Optional[str]] = (
|
||||||
|
iter(ids) if ids else iter(doc.id for doc in documents)
|
||||||
|
)
|
||||||
|
ids_: list[str] = []
|
||||||
|
|
||||||
|
doc_map: dict[str, Document] = {}
|
||||||
|
for doc, vector in zip(documents, float_vectors):
|
||||||
|
doc_id = next(id_iterator)
|
||||||
|
doc_id_ = doc_id or str(uuid.uuid4())
|
||||||
|
ids_.append(doc_id_)
|
||||||
|
doc.metadata["vector"] = vector
|
||||||
|
doc_map[doc_id_] = doc
|
||||||
|
|
||||||
|
await self.cache.put_all(doc_map)
|
||||||
|
|
||||||
|
return ids_
|
||||||
|
|
||||||
|
@override
|
||||||
|
def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def aget_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
||||||
|
"""Get documents by their ids.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ids: The ids of the documents to get.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of Document objects.
|
||||||
|
"""
|
||||||
|
return [e.value async for e in await self.cache.get_all(set(ids))]
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def adelete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
|
||||||
|
"""Async delete by Documeny ID or other criteria.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ids: List of ids to delete. If None, delete all. Default is None.
|
||||||
|
**kwargs: Other keyword arguments that subclasses might use.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[bool]: True if deletion is successful,
|
||||||
|
False otherwise, None if not implemented.
|
||||||
|
"""
|
||||||
|
if ids is None:
|
||||||
|
await self.cache.clear()
|
||||||
|
else:
|
||||||
|
# Efficient parallel delete
|
||||||
|
await asyncio.gather(*(self.cache.remove(i) for i in ids))
|
||||||
|
|
||||||
|
def _parse_coherence_kwargs(self, **kwargs: Any
|
||||||
|
) -> tuple[DistanceAlgorithm, Filter, bool]:
|
||||||
|
allowed_keys = {"algorithm", "filter", "brute_force"}
|
||||||
|
extra_keys = set(kwargs) - allowed_keys
|
||||||
|
if extra_keys:
|
||||||
|
# Silently ignore or log if needed
|
||||||
|
for key in extra_keys:
|
||||||
|
kwargs.pop(key)
|
||||||
|
|
||||||
|
algorithm: DistanceAlgorithm = kwargs.get("algorithm", CosineDistance())
|
||||||
|
filter_: Filter = kwargs.get("filter", Filters.always())
|
||||||
|
brute_force: bool = kwargs.get("brute_force", False)
|
||||||
|
|
||||||
|
return (algorithm, filter_, brute_force)
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def asimilarity_search(
|
||||||
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
|
) -> list[Document]:
|
||||||
|
"""Async method return docs most similar to query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Input text.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
**kwargs: Optional arguments:
|
||||||
|
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||||
|
- filter: filter to use to limit the set of entries to search.
|
||||||
|
(default Filters.always())
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||||
|
- brute_force: Force brute force search, ignoring any available indexes.
|
||||||
|
(default False)
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query.
|
||||||
|
"""
|
||||||
|
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||||
|
|
||||||
|
query_vector = self.embedding.embed_query(query)
|
||||||
|
float_query_vector = FloatVector(Vectors.normalize(query_vector))
|
||||||
|
|
||||||
|
search: SimilaritySearch = SimilaritySearch(
|
||||||
|
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||||
|
float_query_vector,
|
||||||
|
k,
|
||||||
|
algorithm=algorithm,
|
||||||
|
filter=filter_,
|
||||||
|
brute_force=brute_force,
|
||||||
|
)
|
||||||
|
query_results = await self.cache.aggregate(search)
|
||||||
|
|
||||||
|
return [e.value for e in query_results]
|
||||||
|
|
||||||
|
@override
|
||||||
|
def similarity_search(
|
||||||
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
|
) -> list[Document]:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def asimilarity_search_by_vector(
|
||||||
|
self, embedding: list[float], k: int = 4, **kwargs: Any
|
||||||
|
) -> list[Document]:
|
||||||
|
"""Async method return docs most similar to passed embedding vector.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding: Input vector.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
**kwargs: Optional arguments:
|
||||||
|
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||||
|
- filter: filter to use to limit the set of entries to search.
|
||||||
|
(default Filters.always())
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||||
|
- brute_force: Force brute force search, ignoring any available indexes.
|
||||||
|
(default False)
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query.
|
||||||
|
"""
|
||||||
|
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||||
|
float_query_vector = FloatVector(Vectors.normalize(embedding))
|
||||||
|
|
||||||
|
search: SimilaritySearch = SimilaritySearch(
|
||||||
|
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||||
|
float_query_vector,
|
||||||
|
k,
|
||||||
|
algorithm=algorithm,
|
||||||
|
filter=filter_,
|
||||||
|
brute_force=brute_force,
|
||||||
|
)
|
||||||
|
query_results = await self.cache.aggregate(search, filter=Filters.always())
|
||||||
|
|
||||||
|
return [e.value for e in query_results]
|
||||||
|
|
||||||
|
@override
|
||||||
|
def similarity_search_by_vector(
|
||||||
|
self, embedding: list[float], k: int = 4, **kwargs: Any
|
||||||
|
) -> list[Document]:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def asimilarity_search_with_score(
|
||||||
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
|
) -> list[tuple[Document, float]]:
|
||||||
|
"""Async method return list tuple(Document, score) most similar to query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Input text.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
**kwargs: Optional arguments:
|
||||||
|
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||||
|
- filter: filter to use to limit the set of entries to search.
|
||||||
|
(default Filters.always())
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||||
|
- brute_force: Force brute force search, ignoring any available indexes.
|
||||||
|
(default False)
|
||||||
|
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of tuple(Document, score) most similar to the query.
|
||||||
|
"""
|
||||||
|
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||||
|
query_vector = self.embedding.embed_query(query)
|
||||||
|
float_query_vector = FloatVector(Vectors.normalize(query_vector))
|
||||||
|
|
||||||
|
search: SimilaritySearch = SimilaritySearch(
|
||||||
|
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||||
|
float_query_vector,
|
||||||
|
k,
|
||||||
|
algorithm=algorithm,
|
||||||
|
filter=filter_,
|
||||||
|
brute_force=brute_force,
|
||||||
|
)
|
||||||
|
query_results: list[QueryResult] = await self.cache.aggregate(
|
||||||
|
search, filter=Filters.always()
|
||||||
|
)
|
||||||
|
|
||||||
|
return [(e.value, e.distance) for e in query_results]
|
||||||
|
|
||||||
|
@override
|
||||||
|
def similarity_search_with_score(
|
||||||
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
|
) -> list[tuple[Document, float]]:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@override
|
||||||
|
def from_texts(
|
||||||
|
cls,
|
||||||
|
texts: list[str],
|
||||||
|
embedding: Embeddings,
|
||||||
|
metadatas: Optional[list[dict]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> CoherenceVectorStore:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@override
|
||||||
|
async def afrom_texts(
|
||||||
|
cls,
|
||||||
|
texts: list[str],
|
||||||
|
embedding: Embeddings,
|
||||||
|
metadatas: Optional[list[dict]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> CoherenceVectorStore:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
@jsonpickle.handlers.register(Document)
|
||||||
|
class _LangChainDocumentHandler(jsonpickle.handlers.BaseHandler): # type: ignore[misc]
|
||||||
|
def flatten(self, obj: object, data: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Flatten object to a dictionary for handler to use."""
|
||||||
|
ser = SerializerRegistry.serializer(JSONSerializer.SER_FORMAT)
|
||||||
|
json_ser = cast("JSONSerializer", ser)
|
||||||
|
o = cast("Document", obj)
|
||||||
|
vector = o.metadata["vector"]
|
||||||
|
if vector is not None and isinstance(vector, Vector):
|
||||||
|
s = json_ser.serialize(vector)
|
||||||
|
d = json.loads(s[1:])
|
||||||
|
o.metadata["vector"] = json_ser.flatten_to_dict(d)
|
||||||
|
|
||||||
|
data["__dict__"] = obj.__dict__
|
||||||
|
return data
|
||||||
|
|
||||||
|
def restore(self, obj: dict[str, Any]) -> Document:
|
||||||
|
"""Convert dictionary to an object for handler to use."""
|
||||||
|
ser = SerializerRegistry.serializer(JSONSerializer.SER_FORMAT)
|
||||||
|
json_ser = cast("JSONSerializer", ser)
|
||||||
|
d = Document(page_content="")
|
||||||
|
d.__dict__ = obj["__dict__"]
|
||||||
|
vector = d.metadata["vector"]
|
||||||
|
if vector is not None and isinstance(vector, dict):
|
||||||
|
d.metadata["vector"] = json_ser.restore_to_object(vector)
|
||||||
|
return d
|
72
libs/partners/coherence/pyproject.toml
Normal file
72
libs/partners/coherence/pyproject.toml
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
[project]
|
||||||
|
name = "langchain-coherence"
|
||||||
|
version = "0.0.1"
|
||||||
|
description = "LangChain integration for Oracle Coherence as a vector store."
|
||||||
|
authors = [{ name = "Your Name", email = "you@example.com" }]
|
||||||
|
license = {text = "MIT"}
|
||||||
|
readme = "README.md"
|
||||||
|
dependencies = [
|
||||||
|
"langchain-core>=0.1.20",
|
||||||
|
"coherence-client>=2.0.3"
|
||||||
|
]
|
||||||
|
requires-python = ">=3.9"
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/coherence"
|
||||||
|
repository = "https://github.com/langchain-ai/langchain"
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
lint = [
|
||||||
|
"ruff<0.12.0,>=0.11.2",
|
||||||
|
]
|
||||||
|
typing = [
|
||||||
|
"mypy<1.16,>=1.15",
|
||||||
|
]
|
||||||
|
test = [
|
||||||
|
"pytest<9,>=8",
|
||||||
|
"pytest-asyncio<1.0.0,>=0.21.1",
|
||||||
|
"langchain_huggingface",
|
||||||
|
"sentence_transformers"
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
strict = "True"
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
target-version = "py39"
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = [ "ALL",]
|
||||||
|
ignore = [
|
||||||
|
"C90", # McCabe complexity
|
||||||
|
"COM812", # Messes with the formatter
|
||||||
|
"FA100", # Can't activate since we exclude UP007 for now
|
||||||
|
"FIX002", # Line contains TODO
|
||||||
|
"ISC001", # Messes with the formatter
|
||||||
|
"PERF203", # Rarely useful
|
||||||
|
"PLR09", # Too many something (arg, statements, etc)
|
||||||
|
"RUF012", # Doesn't play well with Pydantic
|
||||||
|
"TC001", # Doesn't play well with Pydantic
|
||||||
|
"TC002", # Doesn't play well with Pydantic
|
||||||
|
"TC003", # Doesn't play well with Pydantic
|
||||||
|
"TD002", # Missing author in TODO
|
||||||
|
"TD003", # Missing issue link in TODO
|
||||||
|
"UP007", # Doesn't play well with Pydantic in Python 3.9
|
||||||
|
|
||||||
|
# TODO rules
|
||||||
|
"ANN401",
|
||||||
|
"BLE",
|
||||||
|
"ERA",
|
||||||
|
"PLR2004",
|
||||||
|
]
|
||||||
|
flake8-annotations.allow-star-arg-any = true
|
||||||
|
flake8-annotations.mypy-init-return = true
|
||||||
|
pydocstyle.convention = "google"
|
||||||
|
pydocstyle.ignore-var-parameters = true
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
syncio_default_fixture_loop_scope = "function"
|
||||||
|
testpaths = ["tests"]
|
||||||
|
filterwarnings = [
|
||||||
|
"ignore::UserWarning:pkg_resources"
|
||||||
|
]
|
0
libs/partners/coherence/tests/__init__.py
Normal file
0
libs/partners/coherence/tests/__init__.py
Normal file
125
libs/partners/coherence/tests/test_coherence_store.py
Normal file
125
libs/partners/coherence/tests/test_coherence_store.py
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
from typing import AsyncGenerator
|
||||||
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||||
|
|
||||||
|
from coherence import NamedCache, Session
|
||||||
|
from langchain_coherence import CoherenceVectorStore
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def store() -> AsyncGenerator[CoherenceVectorStore, None]:
|
||||||
|
session: Session = await Session.create()
|
||||||
|
named_cache: NamedCache[str, Document] = await session.get_cache("my-map")
|
||||||
|
embedding :Embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||||
|
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(named_cache,embedding, 384)
|
||||||
|
yield cvs
|
||||||
|
# await cvs.cache.remove_index(CoherenceVectorStore.VECTOR_EXTRACTOR)
|
||||||
|
await cvs.cache.destroy()
|
||||||
|
await session.close()
|
||||||
|
|
||||||
|
def get_test_data():
|
||||||
|
d1 :Document = Document(id="1", page_content="apple")
|
||||||
|
d2 :Document = Document(id="2", page_content="orange")
|
||||||
|
d3 :Document = Document(id="3", page_content="tiger")
|
||||||
|
d4 :Document = Document(id="4", page_content="cat")
|
||||||
|
d5 :Document = Document(id="5", page_content="dog")
|
||||||
|
d6 :Document = Document(id="6", page_content="fox")
|
||||||
|
d7 :Document = Document(id="7", page_content="pear")
|
||||||
|
d8 :Document = Document(id="8", page_content="banana")
|
||||||
|
d9 :Document = Document(id="9", page_content="plum")
|
||||||
|
d10 :Document = Document(id="10", page_content="lion")
|
||||||
|
|
||||||
|
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||||
|
return documents
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_coherence_store(store: CoherenceVectorStore):
|
||||||
|
await run_test_aget_by_id(store)
|
||||||
|
await run_test_adelete(store)
|
||||||
|
await run_test_asimilarity_search(store)
|
||||||
|
await run_test_asimilarity_search_by_vector(store)
|
||||||
|
await run_test_asimilarity_search_with_score(store)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def run_test_aget_by_id(store: CoherenceVectorStore):
|
||||||
|
print()
|
||||||
|
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||||
|
documents = get_test_data()
|
||||||
|
await store.aadd_documents(documents)
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
l = await store.aget_by_ids(ids)
|
||||||
|
assert len(l) == 10
|
||||||
|
print("====")
|
||||||
|
for e in l:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def run_test_adelete(store: CoherenceVectorStore):
|
||||||
|
print()
|
||||||
|
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||||
|
documents = get_test_data()
|
||||||
|
await store.aadd_documents(documents)
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
l = await store.aget_by_ids(ids)
|
||||||
|
assert len(l) == 10
|
||||||
|
await store.adelete(["1","2"])
|
||||||
|
l = await store.aget_by_ids(ids)
|
||||||
|
assert len(l) == 8
|
||||||
|
await store.adelete()
|
||||||
|
l = await store.aget_by_ids(ids)
|
||||||
|
assert len(l) == 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def run_test_asimilarity_search(store: CoherenceVectorStore):
|
||||||
|
print()
|
||||||
|
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||||
|
documents = get_test_data()
|
||||||
|
await store.aadd_documents(documents)
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
l = await store.aget_by_ids(ids)
|
||||||
|
assert len(l) == 10
|
||||||
|
|
||||||
|
# result = await coherence_store.asimilarity_search("animal")
|
||||||
|
result = await store.asimilarity_search("fruit")
|
||||||
|
assert len(result) == 4
|
||||||
|
print("====")
|
||||||
|
for e in result:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def run_test_asimilarity_search_by_vector(store: CoherenceVectorStore):
|
||||||
|
print()
|
||||||
|
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||||
|
documents = get_test_data()
|
||||||
|
await store.aadd_documents(documents)
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
l = await store.aget_by_ids(ids)
|
||||||
|
assert len(l) == 10
|
||||||
|
|
||||||
|
vector = store.embeddings.embed_query("animal")
|
||||||
|
result = await store.asimilarity_search_by_vector(vector)
|
||||||
|
assert len(result) == 4
|
||||||
|
print("====")
|
||||||
|
for e in result:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def run_test_asimilarity_search_with_score(store: CoherenceVectorStore):
|
||||||
|
print()
|
||||||
|
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||||
|
documents = get_test_data()
|
||||||
|
await store.aadd_documents(documents)
|
||||||
|
ids = [doc.id for doc in documents]
|
||||||
|
l = await store.aget_by_ids(ids)
|
||||||
|
assert len(l) == 10
|
||||||
|
|
||||||
|
# result = await coherence_store.asimilarity_search("animal")
|
||||||
|
result = await store.asimilarity_search_with_score("fruit")
|
||||||
|
assert len(result) == 4
|
||||||
|
print("====")
|
||||||
|
for e in result:
|
||||||
|
print(e)
|
2319
libs/partners/coherence/uv.lock
Normal file
2319
libs/partners/coherence/uv.lock
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user