mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-09 04:50:37 +00:00
Initial Commit
This commit is contained in:
parent
990a69d9d7
commit
841149bffa
33
libs/partners/coherence/.gitignore
vendored
Normal file
33
libs/partners/coherence/.gitignore
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.egg
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
.eggs/
|
||||
|
||||
# Virtual environments
|
||||
.venv/
|
||||
.env/
|
||||
|
||||
# uv cache
|
||||
.uv/
|
||||
|
||||
# Testing
|
||||
htmlcov/
|
||||
.cache/
|
||||
.coverage
|
||||
coverage.xml
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
19
libs/partners/coherence/Makefile
Normal file
19
libs/partners/coherence/Makefile
Normal file
@ -0,0 +1,19 @@
|
||||
.PHONY: install lint format test clean
|
||||
|
||||
install:
|
||||
uv pip install -e .[lint,typing,test]
|
||||
|
||||
lint:
|
||||
ruff check langchain_coherence tests
|
||||
|
||||
format:
|
||||
ruff format langchain_coherence tests
|
||||
|
||||
fix: format lint
|
||||
|
||||
test:
|
||||
pytest
|
||||
|
||||
clean:
|
||||
rm -rf .pytest_cache .mypy_cache .ruff_cache .uv __pycache__ *.egg-info build dist
|
||||
|
15
libs/partners/coherence/README.md
Normal file
15
libs/partners/coherence/README.md
Normal file
@ -0,0 +1,15 @@
|
||||
# LangChain Coherence Integration
|
||||
|
||||
This package integrates Oracle Coherence as a vector store in LangChain.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install langchain_coherence
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from langchain_coherence import CoherenceVectorStore
|
||||
```
|
11
libs/partners/coherence/coherence.iml
Normal file
11
libs/partners/coherence/coherence.iml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="WEB_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
5
libs/partners/coherence/langchain_coherence/__init__.py
Normal file
5
libs/partners/coherence/langchain_coherence/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
__version__ = "0.0.1"
|
||||
|
||||
from .coherence_store import CoherenceVectorStore
|
534
libs/partners/coherence/langchain_coherence/coherence_store.py
Normal file
534
libs/partners/coherence/langchain_coherence/coherence_store.py
Normal file
@ -0,0 +1,534 @@
|
||||
"""Coherence vector store."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import uuid
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Final,
|
||||
Optional,
|
||||
cast,
|
||||
)
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator, Sequence
|
||||
|
||||
import jsonpickle # type: ignore[import-untyped]
|
||||
from coherence import ( # type: ignore[import-untyped]
|
||||
Extractors,
|
||||
Filters,
|
||||
NamedCache,
|
||||
)
|
||||
from coherence.ai import ( # type: ignore[import-untyped]
|
||||
CosineDistance,
|
||||
DistanceAlgorithm,
|
||||
FloatVector,
|
||||
HnswIndex,
|
||||
QueryResult,
|
||||
SimilaritySearch,
|
||||
Vector,
|
||||
Vectors,
|
||||
)
|
||||
from coherence.extractor import ( # type: ignore[import-untyped]
|
||||
ValueExtractor,
|
||||
)
|
||||
from coherence.filter import ( # type: ignore[import-untyped]
|
||||
Filter,
|
||||
)
|
||||
from coherence.serialization import ( # type: ignore[import-untyped]
|
||||
JSONSerializer,
|
||||
SerializerRegistry,
|
||||
)
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
|
||||
class CoherenceVectorStore(VectorStore):
|
||||
"""Coherence VectorStore implementation.
|
||||
|
||||
Uses Coherence NamedCache, for similarity search.
|
||||
|
||||
Setup:
|
||||
Install ``langchain-core``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-core
|
||||
|
||||
Add Documents and retrieve them:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding,384)
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
documents = [d1, d2]
|
||||
await cvs.aadd_documents(documents)
|
||||
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await cvs.aget_by_ids(ids)
|
||||
assert len(l) == len(ids)
|
||||
print("====")
|
||||
for e in l:
|
||||
print(e)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
Delete Documents:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding,384)
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
documents = [d1, d2]
|
||||
await cvs.aadd_documents(documents)
|
||||
|
||||
ids = [doc.id for doc in documents]
|
||||
await cvs.adelete(ids)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
Similarity Search:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
def test_data():
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
d3 :Document = Document(id="3", page_content="tiger")
|
||||
d4 :Document = Document(id="4", page_content="cat")
|
||||
d5 :Document = Document(id="5", page_content="dog")
|
||||
d6 :Document = Document(id="6", page_content="fox")
|
||||
d7 :Document = Document(id="7", page_content="pear")
|
||||
d8 :Document = Document(id="8", page_content="banana")
|
||||
d9 :Document = Document(id="9", page_content="plum")
|
||||
d10 :Document = Document(id="10", page_content="lion")
|
||||
|
||||
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||
return documents
|
||||
|
||||
async def test_asimilarity_search():
|
||||
documents = test_data()
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding,384)
|
||||
await cvs.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await cvs.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
result = await cvs.asimilarity_search("fruit")
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
Similarity Search by vector :
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
def test_data():
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
d3 :Document = Document(id="3", page_content="tiger")
|
||||
d4 :Document = Document(id="4", page_content="cat")
|
||||
d5 :Document = Document(id="5", page_content="dog")
|
||||
d6 :Document = Document(id="6", page_content="fox")
|
||||
d7 :Document = Document(id="7", page_content="pear")
|
||||
d8 :Document = Document(id="8", page_content="banana")
|
||||
d9 :Document = Document(id="9", page_content="plum")
|
||||
d10 :Document = Document(id="10", page_content="lion")
|
||||
|
||||
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||
return documents
|
||||
|
||||
async def test_asimilarity_search_by_vector():
|
||||
documents = test_data()
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding,384)
|
||||
await cvs.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await cvs.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
vector = cvs.embeddings.embed_query("fruit")
|
||||
result = await cvs.asimilarity_search_by_vector(vector)
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
"""
|
||||
|
||||
VECTOR_FIELD: Final[str] = "__dict__.metadata.vector"
|
||||
"""The name of the field containing the vector embeddings."""
|
||||
|
||||
VECTOR_EXTRACTOR: Final[ValueExtractor] = Extractors.extract(VECTOR_FIELD)
|
||||
"""The ValueExtractor to extract the embeddings vector."""
|
||||
|
||||
def __init__(self, coherence_cache: NamedCache, embedding: Embeddings):
|
||||
"""Initialize with Coherence cache and embedding function.
|
||||
|
||||
Args:
|
||||
coherence_cache: Coherence NamedCache to use
|
||||
embedding: embedding function to use.
|
||||
"""
|
||||
self.cache = coherence_cache
|
||||
self.embedding = embedding
|
||||
|
||||
@staticmethod
|
||||
async def create(coherence_cache: NamedCache, embedding: Embeddings,
|
||||
dimensions: int
|
||||
) -> CoherenceVectorStore:
|
||||
"""Create an instance of CoherenceVectorStore.
|
||||
|
||||
Args:
|
||||
coherence_cache: Coherence NamedCache to use
|
||||
embedding: embedding function to use.
|
||||
dimensions: size of the vector created by the embedding function
|
||||
"""
|
||||
coh_store: CoherenceVectorStore = CoherenceVectorStore(coherence_cache,
|
||||
embedding)
|
||||
await coherence_cache.add_index(HnswIndex(
|
||||
CoherenceVectorStore.VECTOR_EXTRACTOR, dimensions))
|
||||
return coh_store
|
||||
|
||||
@property
|
||||
@override
|
||||
def embeddings(self) -> Embeddings:
|
||||
return self.embedding
|
||||
|
||||
@override
|
||||
def add_documents(
|
||||
self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any
|
||||
) -> list[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
async def aadd_documents(
|
||||
self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any
|
||||
) -> list[str]:
|
||||
"""Add documents to the store."""
|
||||
texts = [doc.page_content for doc in documents]
|
||||
vectors = await self.embedding.aembed_documents(texts)
|
||||
|
||||
# Apply normalization and wrap in FloatVector
|
||||
float_vectors = [FloatVector(Vectors.normalize(vector)) for vector in vectors]
|
||||
|
||||
if ids and len(ids) != len(texts):
|
||||
msg = (
|
||||
f"ids must be the same length as texts. "
|
||||
f"Got {len(ids)} ids and {len(texts)} texts."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
id_iterator: Iterator[Optional[str]] = (
|
||||
iter(ids) if ids else iter(doc.id for doc in documents)
|
||||
)
|
||||
ids_: list[str] = []
|
||||
|
||||
doc_map: dict[str, Document] = {}
|
||||
for doc, vector in zip(documents, float_vectors):
|
||||
doc_id = next(id_iterator)
|
||||
doc_id_ = doc_id or str(uuid.uuid4())
|
||||
ids_.append(doc_id_)
|
||||
doc.metadata["vector"] = vector
|
||||
doc_map[doc_id_] = doc
|
||||
|
||||
await self.cache.put_all(doc_map)
|
||||
|
||||
return ids_
|
||||
|
||||
@override
|
||||
def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
async def aget_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
||||
"""Get documents by their ids.
|
||||
|
||||
Args:
|
||||
ids: The ids of the documents to get.
|
||||
|
||||
Returns:
|
||||
A list of Document objects.
|
||||
"""
|
||||
return [e.value async for e in await self.cache.get_all(set(ids))]
|
||||
|
||||
@override
|
||||
async def adelete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
|
||||
"""Async delete by Documeny ID or other criteria.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete. If None, delete all. Default is None.
|
||||
**kwargs: Other keyword arguments that subclasses might use.
|
||||
|
||||
Returns:
|
||||
Optional[bool]: True if deletion is successful,
|
||||
False otherwise, None if not implemented.
|
||||
"""
|
||||
if ids is None:
|
||||
await self.cache.clear()
|
||||
else:
|
||||
# Efficient parallel delete
|
||||
await asyncio.gather(*(self.cache.remove(i) for i in ids))
|
||||
|
||||
def _parse_coherence_kwargs(self, **kwargs: Any
|
||||
) -> tuple[DistanceAlgorithm, Filter, bool]:
|
||||
allowed_keys = {"algorithm", "filter", "brute_force"}
|
||||
extra_keys = set(kwargs) - allowed_keys
|
||||
if extra_keys:
|
||||
# Silently ignore or log if needed
|
||||
for key in extra_keys:
|
||||
kwargs.pop(key)
|
||||
|
||||
algorithm: DistanceAlgorithm = kwargs.get("algorithm", CosineDistance())
|
||||
filter_: Filter = kwargs.get("filter", Filters.always())
|
||||
brute_force: bool = kwargs.get("brute_force", False)
|
||||
|
||||
return (algorithm, filter_, brute_force)
|
||||
|
||||
@override
|
||||
async def asimilarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> list[Document]:
|
||||
"""Async method return docs most similar to query.
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: Optional arguments:
|
||||
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||
- filter: filter to use to limit the set of entries to search.
|
||||
(default Filters.always())
|
||||
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||
- brute_force: Force brute force search, ignoring any available indexes.
|
||||
(default False)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query.
|
||||
"""
|
||||
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||
|
||||
query_vector = self.embedding.embed_query(query)
|
||||
float_query_vector = FloatVector(Vectors.normalize(query_vector))
|
||||
|
||||
search: SimilaritySearch = SimilaritySearch(
|
||||
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||
float_query_vector,
|
||||
k,
|
||||
algorithm=algorithm,
|
||||
filter=filter_,
|
||||
brute_force=brute_force,
|
||||
)
|
||||
query_results = await self.cache.aggregate(search)
|
||||
|
||||
return [e.value for e in query_results]
|
||||
|
||||
@override
|
||||
def similarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> list[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
async def asimilarity_search_by_vector(
|
||||
self, embedding: list[float], k: int = 4, **kwargs: Any
|
||||
) -> list[Document]:
|
||||
"""Async method return docs most similar to passed embedding vector.
|
||||
|
||||
Args:
|
||||
embedding: Input vector.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: Optional arguments:
|
||||
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||
- filter: filter to use to limit the set of entries to search.
|
||||
(default Filters.always())
|
||||
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||
- brute_force: Force brute force search, ignoring any available indexes.
|
||||
(default False)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query.
|
||||
"""
|
||||
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||
float_query_vector = FloatVector(Vectors.normalize(embedding))
|
||||
|
||||
search: SimilaritySearch = SimilaritySearch(
|
||||
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||
float_query_vector,
|
||||
k,
|
||||
algorithm=algorithm,
|
||||
filter=filter_,
|
||||
brute_force=brute_force,
|
||||
)
|
||||
query_results = await self.cache.aggregate(search, filter=Filters.always())
|
||||
|
||||
return [e.value for e in query_results]
|
||||
|
||||
@override
|
||||
def similarity_search_by_vector(
|
||||
self, embedding: list[float], k: int = 4, **kwargs: Any
|
||||
) -> list[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
async def asimilarity_search_with_score(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> list[tuple[Document, float]]:
|
||||
"""Async method return list tuple(Document, score) most similar to query.
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: Optional arguments:
|
||||
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||
- filter: filter to use to limit the set of entries to search.
|
||||
(default Filters.always())
|
||||
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||
- brute_force: Force brute force search, ignoring any available indexes.
|
||||
(default False)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||
|
||||
Returns:
|
||||
List of tuple(Document, score) most similar to the query.
|
||||
"""
|
||||
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||
query_vector = self.embedding.embed_query(query)
|
||||
float_query_vector = FloatVector(Vectors.normalize(query_vector))
|
||||
|
||||
search: SimilaritySearch = SimilaritySearch(
|
||||
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||
float_query_vector,
|
||||
k,
|
||||
algorithm=algorithm,
|
||||
filter=filter_,
|
||||
brute_force=brute_force,
|
||||
)
|
||||
query_results: list[QueryResult] = await self.cache.aggregate(
|
||||
search, filter=Filters.always()
|
||||
)
|
||||
|
||||
return [(e.value, e.distance) for e in query_results]
|
||||
|
||||
@override
|
||||
def similarity_search_with_score(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> list[tuple[Document, float]]:
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
@override
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: list[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[list[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> CoherenceVectorStore:
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
@override
|
||||
async def afrom_texts(
|
||||
cls,
|
||||
texts: list[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[list[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> CoherenceVectorStore:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@jsonpickle.handlers.register(Document)
|
||||
class _LangChainDocumentHandler(jsonpickle.handlers.BaseHandler): # type: ignore[misc]
|
||||
def flatten(self, obj: object, data: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Flatten object to a dictionary for handler to use."""
|
||||
ser = SerializerRegistry.serializer(JSONSerializer.SER_FORMAT)
|
||||
json_ser = cast("JSONSerializer", ser)
|
||||
o = cast("Document", obj)
|
||||
vector = o.metadata["vector"]
|
||||
if vector is not None and isinstance(vector, Vector):
|
||||
s = json_ser.serialize(vector)
|
||||
d = json.loads(s[1:])
|
||||
o.metadata["vector"] = json_ser.flatten_to_dict(d)
|
||||
|
||||
data["__dict__"] = obj.__dict__
|
||||
return data
|
||||
|
||||
def restore(self, obj: dict[str, Any]) -> Document:
|
||||
"""Convert dictionary to an object for handler to use."""
|
||||
ser = SerializerRegistry.serializer(JSONSerializer.SER_FORMAT)
|
||||
json_ser = cast("JSONSerializer", ser)
|
||||
d = Document(page_content="")
|
||||
d.__dict__ = obj["__dict__"]
|
||||
vector = d.metadata["vector"]
|
||||
if vector is not None and isinstance(vector, dict):
|
||||
d.metadata["vector"] = json_ser.restore_to_object(vector)
|
||||
return d
|
72
libs/partners/coherence/pyproject.toml
Normal file
72
libs/partners/coherence/pyproject.toml
Normal file
@ -0,0 +1,72 @@
|
||||
[project]
|
||||
name = "langchain-coherence"
|
||||
version = "0.0.1"
|
||||
description = "LangChain integration for Oracle Coherence as a vector store."
|
||||
authors = [{ name = "Your Name", email = "you@example.com" }]
|
||||
license = {text = "MIT"}
|
||||
readme = "README.md"
|
||||
dependencies = [
|
||||
"langchain-core>=0.1.20",
|
||||
"coherence-client>=2.0.3"
|
||||
]
|
||||
requires-python = ">=3.9"
|
||||
|
||||
[project.urls]
|
||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/coherence"
|
||||
repository = "https://github.com/langchain-ai/langchain"
|
||||
|
||||
[project.optional-dependencies]
|
||||
lint = [
|
||||
"ruff<0.12.0,>=0.11.2",
|
||||
]
|
||||
typing = [
|
||||
"mypy<1.16,>=1.15",
|
||||
]
|
||||
test = [
|
||||
"pytest<9,>=8",
|
||||
"pytest-asyncio<1.0.0,>=0.21.1",
|
||||
"langchain_huggingface",
|
||||
"sentence_transformers"
|
||||
]
|
||||
|
||||
[tool.mypy]
|
||||
strict = "True"
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py39"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [ "ALL",]
|
||||
ignore = [
|
||||
"C90", # McCabe complexity
|
||||
"COM812", # Messes with the formatter
|
||||
"FA100", # Can't activate since we exclude UP007 for now
|
||||
"FIX002", # Line contains TODO
|
||||
"ISC001", # Messes with the formatter
|
||||
"PERF203", # Rarely useful
|
||||
"PLR09", # Too many something (arg, statements, etc)
|
||||
"RUF012", # Doesn't play well with Pydantic
|
||||
"TC001", # Doesn't play well with Pydantic
|
||||
"TC002", # Doesn't play well with Pydantic
|
||||
"TC003", # Doesn't play well with Pydantic
|
||||
"TD002", # Missing author in TODO
|
||||
"TD003", # Missing issue link in TODO
|
||||
"UP007", # Doesn't play well with Pydantic in Python 3.9
|
||||
|
||||
# TODO rules
|
||||
"ANN401",
|
||||
"BLE",
|
||||
"ERA",
|
||||
"PLR2004",
|
||||
]
|
||||
flake8-annotations.allow-star-arg-any = true
|
||||
flake8-annotations.mypy-init-return = true
|
||||
pydocstyle.convention = "google"
|
||||
pydocstyle.ignore-var-parameters = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
syncio_default_fixture_loop_scope = "function"
|
||||
testpaths = ["tests"]
|
||||
filterwarnings = [
|
||||
"ignore::UserWarning:pkg_resources"
|
||||
]
|
0
libs/partners/coherence/tests/__init__.py
Normal file
0
libs/partners/coherence/tests/__init__.py
Normal file
125
libs/partners/coherence/tests/test_coherence_store.py
Normal file
125
libs/partners/coherence/tests/test_coherence_store.py
Normal file
@ -0,0 +1,125 @@
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import inspect
|
||||
|
||||
from typing import AsyncGenerator
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.documents import Document
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedCache, Session
|
||||
from langchain_coherence import CoherenceVectorStore
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def store() -> AsyncGenerator[CoherenceVectorStore, None]:
|
||||
session: Session = await Session.create()
|
||||
named_cache: NamedCache[str, Document] = await session.get_cache("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(named_cache,embedding, 384)
|
||||
yield cvs
|
||||
# await cvs.cache.remove_index(CoherenceVectorStore.VECTOR_EXTRACTOR)
|
||||
await cvs.cache.destroy()
|
||||
await session.close()
|
||||
|
||||
def get_test_data():
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
d3 :Document = Document(id="3", page_content="tiger")
|
||||
d4 :Document = Document(id="4", page_content="cat")
|
||||
d5 :Document = Document(id="5", page_content="dog")
|
||||
d6 :Document = Document(id="6", page_content="fox")
|
||||
d7 :Document = Document(id="7", page_content="pear")
|
||||
d8 :Document = Document(id="8", page_content="banana")
|
||||
d9 :Document = Document(id="9", page_content="plum")
|
||||
d10 :Document = Document(id="10", page_content="lion")
|
||||
|
||||
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||
return documents
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_coherence_store(store: CoherenceVectorStore):
|
||||
await run_test_aget_by_id(store)
|
||||
await run_test_adelete(store)
|
||||
await run_test_asimilarity_search(store)
|
||||
await run_test_asimilarity_search_by_vector(store)
|
||||
await run_test_asimilarity_search_with_score(store)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def run_test_aget_by_id(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
print("====")
|
||||
for e in l:
|
||||
print(e)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def run_test_adelete(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
await store.adelete(["1","2"])
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 8
|
||||
await store.adelete()
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def run_test_asimilarity_search(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
# result = await coherence_store.asimilarity_search("animal")
|
||||
result = await store.asimilarity_search("fruit")
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def run_test_asimilarity_search_by_vector(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
vector = store.embeddings.embed_query("animal")
|
||||
result = await store.asimilarity_search_by_vector(vector)
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def run_test_asimilarity_search_with_score(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
# result = await coherence_store.asimilarity_search("animal")
|
||||
result = await store.asimilarity_search_with_score("fruit")
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
2319
libs/partners/coherence/uv.lock
Normal file
2319
libs/partners/coherence/uv.lock
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user