Added documentation, license

2025-08-12 14:23:58 +00:00 · 2025-07-25 13:27:37 -07:00 · 2025-07-25 13:27:37 -07:00 · fd0e46691f
commit fd0e46691f
parent 841149bffa
13 changed files with 2157 additions and 215 deletions
--- a/libs/partners/coherence/.gitignore
+++ b/libs/partners/coherence/.gitignore
@ -27,6 +27,9 @@ coverage.xml
 # Logs
 *.log
 # OCA
 .oca
 # OS
 .DS_Store
 Thumbs.db
--- a/libs/partners/coherence/LICENSE
+++ b/libs/partners/coherence/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2025 LangChain, Inc.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/libs/partners/coherence/Makefile
+++ b/libs/partners/coherence/Makefile
@ -1,19 +1,68 @@
-.PHONY: install lint format test clean
+# Makefile for LangChain-Coherence Integration
 .PHONY: install update-dev update-core lock sync lint format fix check test clean docs-ipynb readme-ipynb create-ipynb help
 # Paths to common tools (adjust if using .uv or other env)
 PYTHON := .venv/bin/python
 PIP := .venv/bin/pip
 RUFF := .venv/bin/ruff
 PYTEST := .venv/bin/pytest
 MYPY := .venv/bin/mypy
 install:
-	uv pip install -e .[lint,typing,test]
+	@echo "🔧 Installing all dependencies..."
 	uv venv
 	uv pip install -e .[lint,typing,test,docs]
-lint:
+update-dev:
-	ruff check langchain_coherence tests
+	@echo "🔄 Updating development dependencies..."
 	uv pip install -e .[lint,typing,test,docs] --upgrade
 update-core:
 	@echo "🔄 Updating core dependencies..."
 	uv pip install --upgrade langchain-core coherence-client
 lock:
 	@echo "🔐 Locking all dependencies to uv.lock..."
 	uv pip compile --all-extras
 sync:
 	@echo "📦 Syncing dependencies from uv.lock..."
 	uv pip install -r uv.lock
 lint: check
 check:
 	@echo "🔍 Running linter and type checker..."
 	$(RUFF) check langchain_coherence tests
 	$(MYPY) --explicit-package-bases langchain_coherence
 format:
-	ruff format langchain_coherence tests
+	@echo "🎨 Formatting code with Ruff..."
 	$(RUFF) format langchain_coherence tests
-fix: format lint
+fix:
 	@echo "🔧 Fixing lint issues..."
 	$(MAKE) format
 	$(RUFF) check langchain_coherence tests --fix
 test:
-	pytest
+	@echo "🧪 Running tests..."
 	$(PYTEST)
 clean:
-	rm -rf .pytest_cache .mypy_cache .ruff_cache .uv __pycache__ *.egg-info build dist
+	@echo "🧹 Cleaning build/test artifacts..."
 	rm -rf .pytest_cache .mypy_cache .ruff_cache .venv .uv __pycache__ *.egg-info dist build
 help:
 	@echo "🛠 Available Make targets:"
 	@echo "  install       - Install all dependencies into .venv"
 	@echo "  update-dev    - Upgrade dev dependencies (ruff, pytest, etc.)"
 	@echo "  update-core   - Upgrade core runtime deps (langchain-core, coherence-client)"
 	@echo "  lock          - Generate uv.lock with pinned versions"
 	@echo "  sync          - Install from uv.lock (repeatable builds)"
 	@echo "  lint          - Run linter and mypy"
 	@echo "  fix           - Autoformat and fix issues"
 	@echo "  test          - Run all tests"
 	@echo "  clean         - Remove temp and build files"
--- a/libs/partners/coherence/README.md
+++ b/libs/partners/coherence/README.md
@ -10,6 +10,94 @@ pip install langchain_coherence
 ## Usage
-```python
+Before using LangChain's CoherenceVectorStore you must ensure that a Coherence server ([Coherence CE](https://github.com/oracle/coherence) 25.03+ or [Oracle Coherence](https://www.oracle.com/java/coherence/) 14.1.2+) is running 
-from langchain_coherence import CoherenceVectorStore
+
 For local development, we recommend using the Coherence CE container image:
 ```aiignore
 docker run -d -p 1408:1408 ghcr.io/oracle/coherence-ce:25.03.2
 ```
 ### Adding and retrieving Documents
 ```python
 import asyncio
 from langchain_coherence import CoherenceVectorStore
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 from coherence import NamedMap, Session
 async def do_run():
    session: Session = await Session.create()
    try:
        named_map: NamedMap[str, Document] = await session.get_map("my-map")
        embedding :Embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-l6-v2")
        # this embedding generates vectors of dimension 384
        cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
            named_map,embedding,384)
        d1 :Document = Document(id="1", page_content="apple")
        d2 :Document = Document(id="2", page_content="orange")
        documents = [d1, d2]
        await cvs.aadd_documents(documents)
        ids = [doc.id for doc in documents]
        l = await cvs.aget_by_ids(ids)
        assert len(l) == len(ids)
        print("====")
        for e in l:
            print(e)
    finally:
        await session.close()
 asyncio.run(do_run())
 ```
 ### SimilaritySearch on Documents
 ```python
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 from coherence import NamedMap, Session
 from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
 def test_data():
    d1 :Document = Document(id="1", page_content="apple")
    d2 :Document = Document(id="2", page_content="orange")
    d3 :Document = Document(id="3", page_content="tiger")
    d4 :Document = Document(id="4", page_content="cat")
    d5 :Document = Document(id="5", page_content="dog")
    d6 :Document = Document(id="6", page_content="fox")
    d7 :Document = Document(id="7", page_content="pear")
    d8 :Document = Document(id="8", page_content="banana")
    d9 :Document = Document(id="9", page_content="plum")
    d10 :Document = Document(id="10", page_content="lion")
    documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
    return documents
 async def test_asimilarity_search():
    documents = test_data()
    session: Session = await Session.create()
    try:
        named_map: NamedMap[str, Document] = await session.get_map("my-map")
        embedding :Embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-MiniLM-l6-v2")
        # this embedding generates vectors of dimension 384
        cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
                                            named_map,embedding,384)
        await cvs.aadd_documents(documents)
        ids = [doc.id for doc in documents]
        l = await cvs.aget_by_ids(ids)
        assert len(l) == 10
        result = await cvs.asimilarity_search("fruit")
        assert len(result) == 4
        print("====")
        for e in result:
            print(e)
    finally:
        await session.close()
 ```
--- a/libs/partners/coherence/coherence.iml
+++ b/libs/partners/coherence/coherence.iml
@ -1,11 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="WEB_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$">
      <sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/libs/partners/coherence/langchain_coherence/init.py
+++ b/libs/partners/coherence/langchain_coherence/init.py
@ -1,5 +1,9 @@
 """Public interface for the LangChain Coherence integration."""
 from __future__ import annotations
 __version__ = "0.0.1"
-from .coherence_store import CoherenceVectorStore
+from langchain_coherence.coherence_store import CoherenceVectorStore
 __all__ = ["CoherenceVectorStore"]
--- a/libs/partners/coherence/langchain_coherence/coherence_store.py
+++ b/libs/partners/coherence/langchain_coherence/coherence_store.py
@ -44,7 +44,6 @@ from coherence.serialization import (  # type: ignore[import-untyped]
    JSONSerializer,
    SerializerRegistry,
 )
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStore
@ -79,7 +78,7 @@ class CoherenceVectorStore(VectorStore):
                            model_name="sentence-transformers/all-MiniLM-l6-v2")
                # this embedding generates vectors of dimension 384
                cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
-                                                    named_map,embedding,384)
+                                                    named_map,embedding
                d1 :Document = Document(id="1", page_content="apple")
                d2 :Document = Document(id="2", page_content="orange")
                documents = [d1, d2]
@ -111,7 +110,7 @@ class CoherenceVectorStore(VectorStore):
                            model_name="sentence-transformers/all-MiniLM-l6-v2")
                # this embedding generates vectors of dimension 384
                cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
-                                                    named_map,embedding,384)
+                                                    named_map,embedding)
                d1 :Document = Document(id="1", page_content="apple")
                d2 :Document = Document(id="2", page_content="orange")
                documents = [d1, d2]
@ -156,7 +155,7 @@ class CoherenceVectorStore(VectorStore):
                            model_name="sentence-transformers/all-MiniLM-l6-v2")
                    # this embedding generates vectors of dimension 384
                    cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
-                                                        named_map,embedding,384)
+                                                        named_map,embedding)
                    await cvs.aadd_documents(documents)
                    ids = [doc.id for doc in documents]
                    l = await cvs.aget_by_ids(ids)
@ -204,7 +203,7 @@ class CoherenceVectorStore(VectorStore):
                            model_name="sentence-transformers/all-MiniLM-l6-v2")
                    # this embedding generates vectors of dimension 384
                    cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
-                                                        named_map,embedding,384)
+                                                        named_map,embedding)
                    await cvs.aadd_documents(documents)
                    ids = [doc.id for doc in documents]
                    l = await cvs.aget_by_ids(ids)
@ -238,21 +237,34 @@ class CoherenceVectorStore(VectorStore):
        self.embedding = embedding
    @staticmethod
-    async def create(coherence_cache: NamedCache, embedding: Embeddings,
+    async def create(
-        dimensions: int
+        coherence_cache: NamedCache,
        embedding: Embeddings,
    ) -> CoherenceVectorStore:
        """Create an instance of CoherenceVectorStore.
        Args:
            coherence_cache: Coherence NamedCache to use
            embedding: embedding function to use.
        """
        coh_store: CoherenceVectorStore = CoherenceVectorStore(
            coherence_cache, embedding
        )
        return coh_store
    async def add_index(self, dimensions: int) -> None:
        """Creates index on the Coherence cache on the VECTOR_FIELD.
        Args:
            dimensions: size of the vector created by the embedding function
        """
-        coh_store: CoherenceVectorStore = CoherenceVectorStore(coherence_cache,
+        await self.cache.add_index(
-                                                               embedding)
+            HnswIndex(CoherenceVectorStore.VECTOR_EXTRACTOR, dimensions)
-        await coherence_cache.add_index(HnswIndex(
+        )
-            CoherenceVectorStore.VECTOR_EXTRACTOR, dimensions))
+
-        return coh_store
+    async def remove_index(self) -> None:
        """Removes index on the Coherence cache on the VECTOR_FIELD."""
        await self.cache.remove_index(CoherenceVectorStore.VECTOR_EXTRACTOR)
    @property
    @override
@ -269,7 +281,19 @@ class CoherenceVectorStore(VectorStore):
    async def aadd_documents(
        self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any
    ) -> list[str]:
-        """Add documents to the store."""
+        """Async run more documents through the embeddings and add to the vectorstore.
        Args:
            documents: Documents to add to the vectorstore.
            ids: Optional list of IDs of the documents.
            kwargs: Additional keyword arguments.
        Returns:
            List of IDs of the added texts.
        Raises:
            ValueError: If the number of IDs does not match the number of documents.
        """
        texts = [doc.page_content for doc in documents]
        vectors = await self.embedding.aembed_documents(texts)
@ -334,7 +358,8 @@ class CoherenceVectorStore(VectorStore):
            # Efficient parallel delete
            await asyncio.gather(*(self.cache.remove(i) for i in ids))
-    def _parse_coherence_kwargs(self, **kwargs: Any
+    def _parse_coherence_kwargs(
        self, **kwargs: Any
    ) -> tuple[DistanceAlgorithm, Filter, bool]:
        allowed_keys = {"algorithm", "filter", "brute_force"}
        extra_keys = set(kwargs) - allowed_keys
@ -489,10 +514,11 @@ class CoherenceVectorStore(VectorStore):
        cls,
        texts: list[str],
        embedding: Embeddings,
-        metadatas: Optional[list[dict]] = None,
+        metadatas: Optional[list[dict[Any, Any]]] = None,
        **kwargs: Any,
    ) -> CoherenceVectorStore:
-        raise NotImplementedError
+        msg = "Use `afrom_texts()` instead; sync context is not supported."
        raise NotImplementedError(msg)
    @classmethod
    @override
@ -500,10 +526,51 @@ class CoherenceVectorStore(VectorStore):
        cls,
        texts: list[str],
        embedding: Embeddings,
-        metadatas: Optional[list[dict]] = None,
+        metadatas: Optional[list[dict[str, Any]]] = None,
        **kwargs: Any,
    ) -> CoherenceVectorStore:
-        raise NotImplementedError
+        """Asynchronously initialize CoherenceVectorStore from texts and embeddings.
        Args:
            texts: List of input text strings.
            embedding: Embedding function to use.
            metadatas: Optional list of metadata dicts corresponding to each text.
            kwargs: Additional keyword arguments.
                - cache: Required Coherence NamedCache[str, Document] instance.
                - ids: Optional list of document IDs.
        Returns:
            CoherenceVectorStore: An initialized and populated vector store.
        Raises:
            ValueError: If `cache` is not provided.
        """
        # Extract and validate required Coherence cache
        cache = kwargs.get("cache")
        if cache is None:
            msg = "Missing required 'cache' parameter in afrom_texts"
            raise ValueError(msg)
        # Optionally use caller-supplied document IDs
        ids: Optional[list[str]] = kwargs.get("ids")
        if ids is not None and len(ids) != len(texts):
            msg = "Length of 'ids' must match length of 'texts'"
            raise ValueError(msg)
        # Create store instance
        store = await cls.create(cache, embedding)
        # Construct Document objects
        documents = []
        for i, text in enumerate(texts):
            metadata = metadatas[i] if metadatas and i < len(metadatas) else {}
            doc_id = ids[i] if ids else str(uuid.uuid4())
            documents.append(Document(page_content=text, metadata=metadata, id=doc_id))
        # Add documents to vector store
        await store.aadd_documents(documents)
        return store
@jsonpickle.handlers.register(Document)
--- a/libs/partners/coherence/pyproject.toml
+++ b/libs/partners/coherence/pyproject.toml
@ -7,7 +7,7 @@ license = {text = "MIT"}
 readme = "README.md"
 dependencies = [
    "langchain-core>=0.1.20",
-    "coherence-client>=2.0.3"
+    "coherence-client>=2.0.4",
 ]
 requires-python = ">=3.9"
@ -28,9 +28,14 @@ test = [
    "langchain_huggingface",
    "sentence_transformers"
 ]
 docs = [
    "jupytext>=1.16",
    "nbdoc>=0.0.29",
 ]
 [tool.mypy]
 strict = "True"
 disallow_untyped_defs = "True"
 [tool.ruff]
 target-version = "py39"
@ -65,8 +70,23 @@ pydocstyle.convention = "google"
 pydocstyle.ignore-var-parameters = true
 [tool.pytest.ini_options]
-syncio_default_fixture_loop_scope = "function"
+asyncio_default_fixture_loop_scope = "function"
 testpaths = ["tests"]
 filterwarnings = [
    "ignore::UserWarning:pkg_resources"
 ]
 markers = [
    "compile: marker used to test compilation-only tests"
 ]
 [tool.ruff.lint.per-file-ignores]
 "tests/**" = [
    "D",      # docstring rules
    "ANN",    # missing type annotations
    "T201",   # use of `print`
    "S101",   # use of `assert`
    "E741",   # ambiguous variable name like `l`
    "RET504", # unnecessary assignment before return
    "I001",   # import sorting
    "UP035"   # import from collections.abc instead of typing
 ]
--- a/libs/partners/coherence/tests/integration_tests/init.py
+++ b/libs/partners/coherence/tests/integration_tests/init.py
--- a/libs/partners/coherence/tests/integration_tests/test_coherence_store.py
+++ b/libs/partners/coherence/tests/integration_tests/test_coherence_store.py
@ -10,17 +10,22 @@ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 from coherence import NamedCache, Session
 from langchain_coherence import CoherenceVectorStore
@pytest_asyncio.fixture
 async def store() -> AsyncGenerator[CoherenceVectorStore, None]:
    session: Session = await Session.create()
    named_cache: NamedCache[str, Document] = await session.get_cache("my-map")
-    embedding :Embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2")
+    embedding: Embeddings = HuggingFaceEmbeddings(
-    cvs :CoherenceVectorStore = await CoherenceVectorStore.create(named_cache,embedding, 384)
+        model_name="sentence-transformers/all-MiniLM-l6-v2"
    )
    cvs: CoherenceVectorStore = await CoherenceVectorStore.create(
        named_cache, embedding
    )
    yield cvs
    # await cvs.cache.remove_index(CoherenceVectorStore.VECTOR_EXTRACTOR)
    await cvs.cache.destroy()
    await session.close()
 def get_test_data():
    d1: Document = Document(id="1", page_content="apple")
    d2: Document = Document(id="2", page_content="orange")
@ -36,16 +41,9 @@ def get_test_data():
    documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
    return documents
@pytest.mark.asyncio
 async def test_coherence_store(store: CoherenceVectorStore):
    await run_test_aget_by_id(store)
    await run_test_adelete(store)
    await run_test_asimilarity_search(store)
    await run_test_asimilarity_search_by_vector(store)
    await run_test_asimilarity_search_with_score(store)
@pytest.mark.asyncio
-async def run_test_aget_by_id(store: CoherenceVectorStore):
+async def test_aget_by_id(store: CoherenceVectorStore):
    print()
    print(f"=======: {inspect.currentframe().f_code.co_name}")
    documents = get_test_data()
@ -57,8 +55,9 @@ async def run_test_aget_by_id(store: CoherenceVectorStore):
    for e in l:
        print(e)
@pytest.mark.asyncio
-async def run_test_adelete(store: CoherenceVectorStore):
+async def test_adelete(store: CoherenceVectorStore):
    print()
    print(f"=======: {inspect.currentframe().f_code.co_name}")
    documents = get_test_data()
@ -73,8 +72,9 @@ async def run_test_adelete(store: CoherenceVectorStore):
    l = await store.aget_by_ids(ids)
    assert len(l) == 0
@pytest.mark.asyncio
-async def run_test_asimilarity_search(store: CoherenceVectorStore):
+async def test_asimilarity_search(store: CoherenceVectorStore):
    print()
    print(f"=======: {inspect.currentframe().f_code.co_name}")
    documents = get_test_data()
@ -90,8 +90,9 @@ async def run_test_asimilarity_search(store: CoherenceVectorStore):
    for e in result:
        print(e)
@pytest.mark.asyncio
-async def run_test_asimilarity_search_by_vector(store: CoherenceVectorStore):
+async def test_asimilarity_search_by_vector(store: CoherenceVectorStore):
    print()
    print(f"=======: {inspect.currentframe().f_code.co_name}")
    documents = get_test_data()
@ -107,8 +108,9 @@ async def run_test_asimilarity_search_by_vector(store: CoherenceVectorStore):
    for e in result:
        print(e)
@pytest.mark.asyncio
-async def run_test_asimilarity_search_with_score(store: CoherenceVectorStore):
+async def test_asimilarity_search_with_score(store: CoherenceVectorStore):
    print()
    print(f"=======: {inspect.currentframe().f_code.co_name}")
    documents = get_test_data()
@ -123,3 +125,29 @@ async def run_test_asimilarity_search_with_score(store: CoherenceVectorStore):
    print("====")
    for e in result:
        print(e)
@pytest.mark.asyncio
 async def test_afrom_texts():
    session = await Session.create()
    try:
        cache = await session.get_cache("test-map-async")
        embedding = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-l6-v2"
        )
        texts = ["apple", "banana"]
        metadatas = [{"cat": "fruit"}, {"cat": "fruit"}]
        ids = ["id1", "id2"]
        store = await CoherenceVectorStore.afrom_texts(
            texts=texts,
            embedding=embedding,
            cache=cache,
            metadatas=metadatas,
            ids=ids,
        )
        results = await store.aget_by_ids(ids)
        assert len(results) == 2
    finally:
        await session.close()
--- a/libs/partners/coherence/tests/integration_tests/test_compile.py
+++ b/libs/partners/coherence/tests/integration_tests/test_compile.py
@ -0,0 +1,6 @@
 import pytest
@pytest.mark.compile
 def test_placeholder() -> None:
    """Used for compiling integration tests without running any real tests."""
--- a/libs/partners/coherence/tests/unit_tests/init.py
+++ b/libs/partners/coherence/tests/unit_tests/init.py
--- a/libs/partners/coherence/uv.lock
+++ b/libs/partners/coherence/uv.lock