Added documentation, license

This commit is contained in:
dhirup 2025-07-25 13:27:37 -07:00
parent 841149bffa
commit fd0e46691f
13 changed files with 2157 additions and 215 deletions

View File

@ -27,6 +27,9 @@ coverage.xml
# Logs # Logs
*.log *.log
# OCA
.oca
# OS # OS
.DS_Store .DS_Store
Thumbs.db Thumbs.db

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,19 +1,68 @@
.PHONY: install lint format test clean # Makefile for LangChain-Coherence Integration
.PHONY: install update-dev update-core lock sync lint format fix check test clean docs-ipynb readme-ipynb create-ipynb help
# Paths to common tools (adjust if using .uv or other env)
PYTHON := .venv/bin/python
PIP := .venv/bin/pip
RUFF := .venv/bin/ruff
PYTEST := .venv/bin/pytest
MYPY := .venv/bin/mypy
install: install:
uv pip install -e .[lint,typing,test] @echo "🔧 Installing all dependencies..."
uv venv
uv pip install -e .[lint,typing,test,docs]
lint: update-dev:
ruff check langchain_coherence tests @echo "🔄 Updating development dependencies..."
uv pip install -e .[lint,typing,test,docs] --upgrade
update-core:
@echo "🔄 Updating core dependencies..."
uv pip install --upgrade langchain-core coherence-client
lock:
@echo "🔐 Locking all dependencies to uv.lock..."
uv pip compile --all-extras
sync:
@echo "📦 Syncing dependencies from uv.lock..."
uv pip install -r uv.lock
lint: check
check:
@echo "🔍 Running linter and type checker..."
$(RUFF) check langchain_coherence tests
$(MYPY) --explicit-package-bases langchain_coherence
format: format:
ruff format langchain_coherence tests @echo "🎨 Formatting code with Ruff..."
$(RUFF) format langchain_coherence tests
fix: format lint fix:
@echo "🔧 Fixing lint issues..."
$(MAKE) format
$(RUFF) check langchain_coherence tests --fix
test: test:
pytest @echo "🧪 Running tests..."
$(PYTEST)
clean: clean:
rm -rf .pytest_cache .mypy_cache .ruff_cache .uv __pycache__ *.egg-info build dist @echo "🧹 Cleaning build/test artifacts..."
rm -rf .pytest_cache .mypy_cache .ruff_cache .venv .uv __pycache__ *.egg-info dist build
help:
@echo "🛠 Available Make targets:"
@echo " install - Install all dependencies into .venv"
@echo " update-dev - Upgrade dev dependencies (ruff, pytest, etc.)"
@echo " update-core - Upgrade core runtime deps (langchain-core, coherence-client)"
@echo " lock - Generate uv.lock with pinned versions"
@echo " sync - Install from uv.lock (repeatable builds)"
@echo " lint - Run linter and mypy"
@echo " fix - Autoformat and fix issues"
@echo " test - Run all tests"
@echo " clean - Remove temp and build files"

View File

@ -10,6 +10,94 @@ pip install langchain_coherence
## Usage ## Usage
```python Before using LangChain's CoherenceVectorStore you must ensure that a Coherence server ([Coherence CE](https://github.com/oracle/coherence) 25.03+ or [Oracle Coherence](https://www.oracle.com/java/coherence/) 14.1.2+) is running
from langchain_coherence import CoherenceVectorStore
For local development, we recommend using the Coherence CE container image:
```aiignore
docker run -d -p 1408:1408 ghcr.io/oracle/coherence-ce:25.03.2
```
### Adding and retrieving Documents
```python
import asyncio
from langchain_coherence import CoherenceVectorStore
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from coherence import NamedMap, Session
async def do_run():
session: Session = await Session.create()
try:
named_map: NamedMap[str, Document] = await session.get_map("my-map")
embedding :Embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384)
d1 :Document = Document(id="1", page_content="apple")
d2 :Document = Document(id="2", page_content="orange")
documents = [d1, d2]
await cvs.aadd_documents(documents)
ids = [doc.id for doc in documents]
l = await cvs.aget_by_ids(ids)
assert len(l) == len(ids)
print("====")
for e in l:
print(e)
finally:
await session.close()
asyncio.run(do_run())
```
### SimilaritySearch on Documents
```python
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from coherence import NamedMap, Session
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
def test_data():
d1 :Document = Document(id="1", page_content="apple")
d2 :Document = Document(id="2", page_content="orange")
d3 :Document = Document(id="3", page_content="tiger")
d4 :Document = Document(id="4", page_content="cat")
d5 :Document = Document(id="5", page_content="dog")
d6 :Document = Document(id="6", page_content="fox")
d7 :Document = Document(id="7", page_content="pear")
d8 :Document = Document(id="8", page_content="banana")
d9 :Document = Document(id="9", page_content="plum")
d10 :Document = Document(id="10", page_content="lion")
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
return documents
async def test_asimilarity_search():
documents = test_data()
session: Session = await Session.create()
try:
named_map: NamedMap[str, Document] = await session.get_map("my-map")
embedding :Embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384)
await cvs.aadd_documents(documents)
ids = [doc.id for doc in documents]
l = await cvs.aget_by_ids(ids)
assert len(l) == 10
result = await cvs.asimilarity_search("fruit")
assert len(result) == 4
print("====")
for e in result:
print(e)
finally:
await session.close()
``` ```

View File

@ -1,11 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="WEB_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -1,5 +1,9 @@
"""Public interface for the LangChain Coherence integration."""
from __future__ import annotations from __future__ import annotations
__version__ = "0.0.1" __version__ = "0.0.1"
from .coherence_store import CoherenceVectorStore from langchain_coherence.coherence_store import CoherenceVectorStore
__all__ = ["CoherenceVectorStore"]

View File

@ -44,7 +44,6 @@ from coherence.serialization import ( # type: ignore[import-untyped]
JSONSerializer, JSONSerializer,
SerializerRegistry, SerializerRegistry,
) )
from langchain_core.documents import Document from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings from langchain_core.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore from langchain_core.vectorstores import VectorStore
@ -79,7 +78,7 @@ class CoherenceVectorStore(VectorStore):
model_name="sentence-transformers/all-MiniLM-l6-v2") model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384 # this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create( cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384) named_map,embedding
d1 :Document = Document(id="1", page_content="apple") d1 :Document = Document(id="1", page_content="apple")
d2 :Document = Document(id="2", page_content="orange") d2 :Document = Document(id="2", page_content="orange")
documents = [d1, d2] documents = [d1, d2]
@ -111,7 +110,7 @@ class CoherenceVectorStore(VectorStore):
model_name="sentence-transformers/all-MiniLM-l6-v2") model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384 # this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create( cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384) named_map,embedding)
d1 :Document = Document(id="1", page_content="apple") d1 :Document = Document(id="1", page_content="apple")
d2 :Document = Document(id="2", page_content="orange") d2 :Document = Document(id="2", page_content="orange")
documents = [d1, d2] documents = [d1, d2]
@ -156,7 +155,7 @@ class CoherenceVectorStore(VectorStore):
model_name="sentence-transformers/all-MiniLM-l6-v2") model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384 # this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create( cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384) named_map,embedding)
await cvs.aadd_documents(documents) await cvs.aadd_documents(documents)
ids = [doc.id for doc in documents] ids = [doc.id for doc in documents]
l = await cvs.aget_by_ids(ids) l = await cvs.aget_by_ids(ids)
@ -204,7 +203,7 @@ class CoherenceVectorStore(VectorStore):
model_name="sentence-transformers/all-MiniLM-l6-v2") model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384 # this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create( cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384) named_map,embedding)
await cvs.aadd_documents(documents) await cvs.aadd_documents(documents)
ids = [doc.id for doc in documents] ids = [doc.id for doc in documents]
l = await cvs.aget_by_ids(ids) l = await cvs.aget_by_ids(ids)
@ -238,21 +237,34 @@ class CoherenceVectorStore(VectorStore):
self.embedding = embedding self.embedding = embedding
@staticmethod @staticmethod
async def create(coherence_cache: NamedCache, embedding: Embeddings, async def create(
dimensions: int coherence_cache: NamedCache,
embedding: Embeddings,
) -> CoherenceVectorStore: ) -> CoherenceVectorStore:
"""Create an instance of CoherenceVectorStore. """Create an instance of CoherenceVectorStore.
Args: Args:
coherence_cache: Coherence NamedCache to use coherence_cache: Coherence NamedCache to use
embedding: embedding function to use. embedding: embedding function to use.
"""
coh_store: CoherenceVectorStore = CoherenceVectorStore(
coherence_cache, embedding
)
return coh_store
async def add_index(self, dimensions: int) -> None:
"""Creates index on the Coherence cache on the VECTOR_FIELD.
Args:
dimensions: size of the vector created by the embedding function dimensions: size of the vector created by the embedding function
""" """
coh_store: CoherenceVectorStore = CoherenceVectorStore(coherence_cache, await self.cache.add_index(
embedding) HnswIndex(CoherenceVectorStore.VECTOR_EXTRACTOR, dimensions)
await coherence_cache.add_index(HnswIndex( )
CoherenceVectorStore.VECTOR_EXTRACTOR, dimensions))
return coh_store async def remove_index(self) -> None:
"""Removes index on the Coherence cache on the VECTOR_FIELD."""
await self.cache.remove_index(CoherenceVectorStore.VECTOR_EXTRACTOR)
@property @property
@override @override
@ -269,7 +281,19 @@ class CoherenceVectorStore(VectorStore):
async def aadd_documents( async def aadd_documents(
self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any
) -> list[str]: ) -> list[str]:
"""Add documents to the store.""" """Async run more documents through the embeddings and add to the vectorstore.
Args:
documents: Documents to add to the vectorstore.
ids: Optional list of IDs of the documents.
kwargs: Additional keyword arguments.
Returns:
List of IDs of the added texts.
Raises:
ValueError: If the number of IDs does not match the number of documents.
"""
texts = [doc.page_content for doc in documents] texts = [doc.page_content for doc in documents]
vectors = await self.embedding.aembed_documents(texts) vectors = await self.embedding.aembed_documents(texts)
@ -334,7 +358,8 @@ class CoherenceVectorStore(VectorStore):
# Efficient parallel delete # Efficient parallel delete
await asyncio.gather(*(self.cache.remove(i) for i in ids)) await asyncio.gather(*(self.cache.remove(i) for i in ids))
def _parse_coherence_kwargs(self, **kwargs: Any def _parse_coherence_kwargs(
self, **kwargs: Any
) -> tuple[DistanceAlgorithm, Filter, bool]: ) -> tuple[DistanceAlgorithm, Filter, bool]:
allowed_keys = {"algorithm", "filter", "brute_force"} allowed_keys = {"algorithm", "filter", "brute_force"}
extra_keys = set(kwargs) - allowed_keys extra_keys = set(kwargs) - allowed_keys
@ -489,10 +514,11 @@ class CoherenceVectorStore(VectorStore):
cls, cls,
texts: list[str], texts: list[str],
embedding: Embeddings, embedding: Embeddings,
metadatas: Optional[list[dict]] = None, metadatas: Optional[list[dict[Any, Any]]] = None,
**kwargs: Any, **kwargs: Any,
) -> CoherenceVectorStore: ) -> CoherenceVectorStore:
raise NotImplementedError msg = "Use `afrom_texts()` instead; sync context is not supported."
raise NotImplementedError(msg)
@classmethod @classmethod
@override @override
@ -500,10 +526,51 @@ class CoherenceVectorStore(VectorStore):
cls, cls,
texts: list[str], texts: list[str],
embedding: Embeddings, embedding: Embeddings,
metadatas: Optional[list[dict]] = None, metadatas: Optional[list[dict[str, Any]]] = None,
**kwargs: Any, **kwargs: Any,
) -> CoherenceVectorStore: ) -> CoherenceVectorStore:
raise NotImplementedError """Asynchronously initialize CoherenceVectorStore from texts and embeddings.
Args:
texts: List of input text strings.
embedding: Embedding function to use.
metadatas: Optional list of metadata dicts corresponding to each text.
kwargs: Additional keyword arguments.
- cache: Required Coherence NamedCache[str, Document] instance.
- ids: Optional list of document IDs.
Returns:
CoherenceVectorStore: An initialized and populated vector store.
Raises:
ValueError: If `cache` is not provided.
"""
# Extract and validate required Coherence cache
cache = kwargs.get("cache")
if cache is None:
msg = "Missing required 'cache' parameter in afrom_texts"
raise ValueError(msg)
# Optionally use caller-supplied document IDs
ids: Optional[list[str]] = kwargs.get("ids")
if ids is not None and len(ids) != len(texts):
msg = "Length of 'ids' must match length of 'texts'"
raise ValueError(msg)
# Create store instance
store = await cls.create(cache, embedding)
# Construct Document objects
documents = []
for i, text in enumerate(texts):
metadata = metadatas[i] if metadatas and i < len(metadatas) else {}
doc_id = ids[i] if ids else str(uuid.uuid4())
documents.append(Document(page_content=text, metadata=metadata, id=doc_id))
# Add documents to vector store
await store.aadd_documents(documents)
return store
@jsonpickle.handlers.register(Document) @jsonpickle.handlers.register(Document)

View File

@ -7,7 +7,7 @@ license = {text = "MIT"}
readme = "README.md" readme = "README.md"
dependencies = [ dependencies = [
"langchain-core>=0.1.20", "langchain-core>=0.1.20",
"coherence-client>=2.0.3" "coherence-client>=2.0.4",
] ]
requires-python = ">=3.9" requires-python = ">=3.9"
@ -28,9 +28,14 @@ test = [
"langchain_huggingface", "langchain_huggingface",
"sentence_transformers" "sentence_transformers"
] ]
docs = [
"jupytext>=1.16",
"nbdoc>=0.0.29",
]
[tool.mypy] [tool.mypy]
strict = "True" strict = "True"
disallow_untyped_defs = "True"
[tool.ruff] [tool.ruff]
target-version = "py39" target-version = "py39"
@ -65,8 +70,23 @@ pydocstyle.convention = "google"
pydocstyle.ignore-var-parameters = true pydocstyle.ignore-var-parameters = true
[tool.pytest.ini_options] [tool.pytest.ini_options]
syncio_default_fixture_loop_scope = "function" asyncio_default_fixture_loop_scope = "function"
testpaths = ["tests"] testpaths = ["tests"]
filterwarnings = [ filterwarnings = [
"ignore::UserWarning:pkg_resources" "ignore::UserWarning:pkg_resources"
] ]
markers = [
"compile: marker used to test compilation-only tests"
]
[tool.ruff.lint.per-file-ignores]
"tests/**" = [
"D", # docstring rules
"ANN", # missing type annotations
"T201", # use of `print`
"S101", # use of `assert`
"E741", # ambiguous variable name like `l`
"RET504", # unnecessary assignment before return
"I001", # import sorting
"UP035" # import from collections.abc instead of typing
]

View File

@ -10,17 +10,22 @@ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from coherence import NamedCache, Session from coherence import NamedCache, Session
from langchain_coherence import CoherenceVectorStore from langchain_coherence import CoherenceVectorStore
@pytest_asyncio.fixture @pytest_asyncio.fixture
async def store() -> AsyncGenerator[CoherenceVectorStore, None]: async def store() -> AsyncGenerator[CoherenceVectorStore, None]:
session: Session = await Session.create() session: Session = await Session.create()
named_cache: NamedCache[str, Document] = await session.get_cache("my-map") named_cache: NamedCache[str, Document] = await session.get_cache("my-map")
embedding :Embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2") embedding: Embeddings = HuggingFaceEmbeddings(
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(named_cache,embedding, 384) model_name="sentence-transformers/all-MiniLM-l6-v2"
)
cvs: CoherenceVectorStore = await CoherenceVectorStore.create(
named_cache, embedding
)
yield cvs yield cvs
# await cvs.cache.remove_index(CoherenceVectorStore.VECTOR_EXTRACTOR)
await cvs.cache.destroy() await cvs.cache.destroy()
await session.close() await session.close()
def get_test_data(): def get_test_data():
d1: Document = Document(id="1", page_content="apple") d1: Document = Document(id="1", page_content="apple")
d2: Document = Document(id="2", page_content="orange") d2: Document = Document(id="2", page_content="orange")
@ -36,16 +41,9 @@ def get_test_data():
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10] documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
return documents return documents
@pytest.mark.asyncio
async def test_coherence_store(store: CoherenceVectorStore):
await run_test_aget_by_id(store)
await run_test_adelete(store)
await run_test_asimilarity_search(store)
await run_test_asimilarity_search_by_vector(store)
await run_test_asimilarity_search_with_score(store)
@pytest.mark.asyncio @pytest.mark.asyncio
async def run_test_aget_by_id(store: CoherenceVectorStore): async def test_aget_by_id(store: CoherenceVectorStore):
print() print()
print(f"=======: {inspect.currentframe().f_code.co_name}") print(f"=======: {inspect.currentframe().f_code.co_name}")
documents = get_test_data() documents = get_test_data()
@ -57,8 +55,9 @@ async def run_test_aget_by_id(store: CoherenceVectorStore):
for e in l: for e in l:
print(e) print(e)
@pytest.mark.asyncio @pytest.mark.asyncio
async def run_test_adelete(store: CoherenceVectorStore): async def test_adelete(store: CoherenceVectorStore):
print() print()
print(f"=======: {inspect.currentframe().f_code.co_name}") print(f"=======: {inspect.currentframe().f_code.co_name}")
documents = get_test_data() documents = get_test_data()
@ -73,8 +72,9 @@ async def run_test_adelete(store: CoherenceVectorStore):
l = await store.aget_by_ids(ids) l = await store.aget_by_ids(ids)
assert len(l) == 0 assert len(l) == 0
@pytest.mark.asyncio @pytest.mark.asyncio
async def run_test_asimilarity_search(store: CoherenceVectorStore): async def test_asimilarity_search(store: CoherenceVectorStore):
print() print()
print(f"=======: {inspect.currentframe().f_code.co_name}") print(f"=======: {inspect.currentframe().f_code.co_name}")
documents = get_test_data() documents = get_test_data()
@ -90,8 +90,9 @@ async def run_test_asimilarity_search(store: CoherenceVectorStore):
for e in result: for e in result:
print(e) print(e)
@pytest.mark.asyncio @pytest.mark.asyncio
async def run_test_asimilarity_search_by_vector(store: CoherenceVectorStore): async def test_asimilarity_search_by_vector(store: CoherenceVectorStore):
print() print()
print(f"=======: {inspect.currentframe().f_code.co_name}") print(f"=======: {inspect.currentframe().f_code.co_name}")
documents = get_test_data() documents = get_test_data()
@ -107,8 +108,9 @@ async def run_test_asimilarity_search_by_vector(store: CoherenceVectorStore):
for e in result: for e in result:
print(e) print(e)
@pytest.mark.asyncio @pytest.mark.asyncio
async def run_test_asimilarity_search_with_score(store: CoherenceVectorStore): async def test_asimilarity_search_with_score(store: CoherenceVectorStore):
print() print()
print(f"=======: {inspect.currentframe().f_code.co_name}") print(f"=======: {inspect.currentframe().f_code.co_name}")
documents = get_test_data() documents = get_test_data()
@ -123,3 +125,29 @@ async def run_test_asimilarity_search_with_score(store: CoherenceVectorStore):
print("====") print("====")
for e in result: for e in result:
print(e) print(e)
@pytest.mark.asyncio
async def test_afrom_texts():
session = await Session.create()
try:
cache = await session.get_cache("test-map-async")
embedding = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-l6-v2"
)
texts = ["apple", "banana"]
metadatas = [{"cat": "fruit"}, {"cat": "fruit"}]
ids = ["id1", "id2"]
store = await CoherenceVectorStore.afrom_texts(
texts=texts,
embedding=embedding,
cache=cache,
metadatas=metadatas,
ids=ids,
)
results = await store.aget_by_ids(ids)
assert len(results) == 2
finally:
await session.close()

View File

@ -0,0 +1,6 @@
import pytest
@pytest.mark.compile
def test_placeholder() -> None:
"""Used for compiling integration tests without running any real tests."""

File diff suppressed because it is too large Load Diff