mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-08 14:31:55 +00:00
core[minor]: add upsert, streaming_upsert, aupsert, astreaming_upsert methods to the VectorStore abstraction (#23774)
This PR rolls out part of the new proposed interface for vectorstores (https://github.com/langchain-ai/langchain/pull/23544) to existing store implementations. The PR makes the following changes: 1. Adds standard upsert, streaming_upsert, aupsert, astreaming_upsert methods to the vectorstore. 2. Updates `add_texts` and `aadd_texts` to be non required with a default implementation that delegates to `upsert` and `aupsert` if those have been implemented. The original `add_texts` and `aadd_texts` methods are problematic as they spread object specific information across document and **kwargs. (e.g., ids are not a part of the document) 3. Adds a default implementation to `add_documents` and `aadd_documents` that delegates to `upsert` and `aupsert` respectively. 4. Adds standard unit tests to verify that a given vectorstore implements a correct read/write API. A downside of this implementation is that it creates `upsert` with a very similar signature to `add_documents`. The reason for introducing `upsert` is to: * Remove any ambiguities about what information is allowed in `kwargs`. Specifically kwargs should only be used for information common to all indexed data. (e.g., indexing timeout). *Allow inheriting from an anticipated generalized interface for indexing that will allow indexing `BaseMedia` (i.e., allow making a vectorstore for images/audio etc.) `add_documents` can be deprecated in the future in favor of `upsert` to make sure that users have a single correct way of indexing content. --------- Co-authored-by: ccurme <chester.curme@gmail.com>
This commit is contained in:
31
libs/core/tests/unit_tests/utils/test_aiter.py
Normal file
31
libs/core/tests/unit_tests/utils/test_aiter.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from typing import AsyncIterator, List
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core.utils.aiter import abatch_iterate
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_size, input_iterable, expected_output",
|
||||
[
|
||||
(2, [1, 2, 3, 4, 5], [[1, 2], [3, 4], [5]]),
|
||||
(3, [10, 20, 30, 40, 50], [[10, 20, 30], [40, 50]]),
|
||||
(1, [100, 200, 300], [[100], [200], [300]]),
|
||||
(4, [], []),
|
||||
],
|
||||
)
|
||||
async def test_abatch_iterate(
|
||||
input_size: int, input_iterable: List[str], expected_output: List[str]
|
||||
) -> None:
|
||||
"""Test batching function."""
|
||||
|
||||
async def _to_async_iterable(iterable: List[str]) -> AsyncIterator[str]:
|
||||
for item in iterable:
|
||||
yield item
|
||||
|
||||
iterator_ = abatch_iterate(input_size, _to_async_iterable(input_iterable))
|
||||
|
||||
assert isinstance(iterator_, AsyncIterator)
|
||||
|
||||
output = [el async for el in iterator_]
|
||||
assert output == expected_output
|
@@ -6,6 +6,8 @@ EXPECTED_ALL = [
|
||||
"convert_to_secret_str",
|
||||
"formatter",
|
||||
"get_bolded_text",
|
||||
"abatch_iterate",
|
||||
"batch_iterate",
|
||||
"get_color_mapping",
|
||||
"get_colored_text",
|
||||
"get_pydantic_field_names",
|
||||
|
Reference in New Issue
Block a user